blob: 57a85424fcd760ef72a7f6e04a61f08078a073a8 [file] [log] [blame]
zhanghailiang35a6ed42016-10-27 14:42:52 +08001/*
2 * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
3 * (a.k.a. Fault Tolerance or Continuous Replication)
4 *
5 * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
6 * Copyright (c) 2016 FUJITSU LIMITED
7 * Copyright (c) 2016 Intel Corporation
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
11 */
12
13#include "qemu/osdep.h"
zhanghailiang0b827d52016-10-27 14:42:54 +080014#include "sysemu/sysemu.h"
Markus Armbrustere688df62018-02-01 12:18:31 +010015#include "qapi/error.h"
Markus Armbruster9af23982018-02-11 10:36:01 +010016#include "qapi/qapi-commands-migration.h"
Juan Quintela40014d82017-04-17 19:34:36 +020017#include "qemu-file-channel.h"
Juan Quintela6666c962017-04-24 20:07:27 +020018#include "migration.h"
Juan Quintela08a0aee2017-04-20 18:52:18 +020019#include "qemu-file.h"
Juan Quintela20a519a2017-04-20 14:48:46 +020020#include "savevm.h"
zhanghailiang35a6ed42016-10-27 14:42:52 +080021#include "migration/colo.h"
Juan Quintela2c9e6fe2017-04-21 14:31:22 +020022#include "block.h"
zhanghailianga91246c2016-10-27 14:42:59 +080023#include "io/channel-buffer.h"
zhanghailiang0b827d52016-10-27 14:42:54 +080024#include "trace.h"
zhanghailiang56ba83d2016-10-27 14:42:56 +080025#include "qemu/error-report.h"
zhanghailiangd89e6662016-10-27 14:43:03 +080026#include "migration/failover.h"
Zhang Chen2c9639e2017-02-28 11:01:56 -080027#include "replication.h"
Zhang Chen131b2152018-09-03 12:38:45 +080028#include "net/colo-compare.h"
29#include "net/colo.h"
Zhang Chen8e48ac92018-09-03 12:38:46 +080030#include "block/block.h"
zhanghailiang9ecff6d2018-09-03 12:38:51 +080031#include "qapi/qapi-events-migration.h"
Zhang Chenf56c0062018-09-03 12:38:53 +080032#include "qapi/qmp/qerror.h"
Zhang Chen3f6df992018-09-03 12:38:54 +080033#include "sysemu/cpus.h"
zhanghailiang7b343532018-09-03 12:38:58 +080034#include "net/filter.h"
zhanghailiang35a6ed42016-10-27 14:42:52 +080035
zhanghailianga8664ba2017-01-17 20:57:44 +080036static bool vmstate_loading;
Zhang Chen131b2152018-09-03 12:38:45 +080037static Notifier packets_compare_notifier;
zhanghailianga8664ba2017-01-17 20:57:44 +080038
zhanghailianga91246c2016-10-27 14:42:59 +080039#define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
40
zhanghailiang0b827d52016-10-27 14:42:54 +080041bool migration_in_colo_state(void)
42{
43 MigrationState *s = migrate_get_current();
44
45 return (s->state == MIGRATION_STATUS_COLO);
46}
47
zhanghailiang25d0c162016-10-27 14:42:55 +080048bool migration_incoming_in_colo_state(void)
49{
50 MigrationIncomingState *mis = migration_incoming_get_current();
51
52 return mis && (mis->state == MIGRATION_STATUS_COLO);
53}
54
zhanghailiangb3f7f0c2016-10-27 14:43:05 +080055static bool colo_runstate_is_stopped(void)
56{
57 return runstate_check(RUN_STATE_COLO) || !runstate_is_running();
58}
59
zhanghailiang9d2db372016-10-27 14:43:06 +080060static void secondary_vm_do_failover(void)
61{
62 int old_state;
63 MigrationIncomingState *mis = migration_incoming_get_current();
Zhang Chen8e48ac92018-09-03 12:38:46 +080064 Error *local_err = NULL;
zhanghailiang9d2db372016-10-27 14:43:06 +080065
zhanghailianga8664ba2017-01-17 20:57:44 +080066 /* Can not do failover during the process of VM's loading VMstate, Or
67 * it will break the secondary VM.
68 */
69 if (vmstate_loading) {
70 old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
71 FAILOVER_STATUS_RELAUNCH);
72 if (old_state != FAILOVER_STATUS_ACTIVE) {
73 error_report("Unknown error while do failover for secondary VM,"
Markus Armbruster977c7362017-08-24 10:46:08 +020074 "old_state: %s", FailoverStatus_str(old_state));
zhanghailianga8664ba2017-01-17 20:57:44 +080075 }
76 return;
77 }
78
zhanghailiang9d2db372016-10-27 14:43:06 +080079 migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
80 MIGRATION_STATUS_COMPLETED);
81
Zhang Chen8e48ac92018-09-03 12:38:46 +080082 replication_stop_all(true, &local_err);
83 if (local_err) {
84 error_report_err(local_err);
85 }
86
zhanghailiang7b343532018-09-03 12:38:58 +080087 /* Notify all filters of all NIC to do checkpoint */
88 colo_notify_filters_event(COLO_EVENT_FAILOVER, &local_err);
89 if (local_err) {
90 error_report_err(local_err);
91 }
92
zhanghailiang9d2db372016-10-27 14:43:06 +080093 if (!autostart) {
94 error_report("\"-S\" qemu option will be ignored in secondary side");
95 /* recover runstate to normal migration finish state */
96 autostart = true;
97 }
zhanghailiangc937b9a2017-01-17 20:57:43 +080098 /*
99 * Make sure COLO incoming thread not block in recv or send,
100 * If mis->from_src_file and mis->to_src_file use the same fd,
101 * The second shutdown() will return -1, we ignore this value,
102 * It is harmless.
103 */
104 if (mis->from_src_file) {
105 qemu_file_shutdown(mis->from_src_file);
106 }
107 if (mis->to_src_file) {
108 qemu_file_shutdown(mis->to_src_file);
109 }
zhanghailiang9d2db372016-10-27 14:43:06 +0800110
111 old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
112 FAILOVER_STATUS_COMPLETED);
113 if (old_state != FAILOVER_STATUS_ACTIVE) {
114 error_report("Incorrect state (%s) while doing failover for "
Markus Armbruster977c7362017-08-24 10:46:08 +0200115 "secondary VM", FailoverStatus_str(old_state));
zhanghailiang9d2db372016-10-27 14:43:06 +0800116 return;
117 }
zhanghailiangc937b9a2017-01-17 20:57:43 +0800118 /* Notify COLO incoming thread that failover work is finished */
119 qemu_sem_post(&mis->colo_incoming_sem);
zhanghailiang9d2db372016-10-27 14:43:06 +0800120 /* For Secondary VM, jump to incoming co */
121 if (mis->migration_incoming_co) {
122 qemu_coroutine_enter(mis->migration_incoming_co);
123 }
124}
125
zhanghailiangb3f7f0c2016-10-27 14:43:05 +0800126static void primary_vm_do_failover(void)
127{
128 MigrationState *s = migrate_get_current();
129 int old_state;
Zhang Chen8e48ac92018-09-03 12:38:46 +0800130 Error *local_err = NULL;
zhanghailiangb3f7f0c2016-10-27 14:43:05 +0800131
132 migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
133 MIGRATION_STATUS_COMPLETED);
134
zhanghailiangc937b9a2017-01-17 20:57:43 +0800135 /*
136 * Wake up COLO thread which may blocked in recv() or send(),
137 * The s->rp_state.from_dst_file and s->to_dst_file may use the
138 * same fd, but we still shutdown the fd for twice, it is harmless.
139 */
140 if (s->to_dst_file) {
141 qemu_file_shutdown(s->to_dst_file);
142 }
143 if (s->rp_state.from_dst_file) {
144 qemu_file_shutdown(s->rp_state.from_dst_file);
145 }
146
zhanghailiangb3f7f0c2016-10-27 14:43:05 +0800147 old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
148 FAILOVER_STATUS_COMPLETED);
149 if (old_state != FAILOVER_STATUS_ACTIVE) {
150 error_report("Incorrect state (%s) while doing failover for Primary VM",
Markus Armbruster977c7362017-08-24 10:46:08 +0200151 FailoverStatus_str(old_state));
zhanghailiangb3f7f0c2016-10-27 14:43:05 +0800152 return;
153 }
Zhang Chen8e48ac92018-09-03 12:38:46 +0800154
155 replication_stop_all(true, &local_err);
156 if (local_err) {
157 error_report_err(local_err);
158 local_err = NULL;
159 }
160
zhanghailiangc937b9a2017-01-17 20:57:43 +0800161 /* Notify COLO thread that failover work is finished */
162 qemu_sem_post(&s->colo_exit_sem);
zhanghailiangb3f7f0c2016-10-27 14:43:05 +0800163}
164
Zhang Chenaad555c2018-09-03 12:38:47 +0800165COLOMode get_colo_mode(void)
166{
167 if (migration_in_colo_state()) {
168 return COLO_MODE_PRIMARY;
169 } else if (migration_incoming_in_colo_state()) {
170 return COLO_MODE_SECONDARY;
171 } else {
Zhang Chen41b6b772018-09-03 12:38:52 +0800172 return COLO_MODE_NONE;
Zhang Chenaad555c2018-09-03 12:38:47 +0800173 }
174}
175
zhanghailiangb3f7f0c2016-10-27 14:43:05 +0800176void colo_do_failover(MigrationState *s)
177{
178 /* Make sure VM stopped while failover happened. */
179 if (!colo_runstate_is_stopped()) {
180 vm_stop_force_state(RUN_STATE_COLO);
181 }
182
183 if (get_colo_mode() == COLO_MODE_PRIMARY) {
184 primary_vm_do_failover();
zhanghailiang9d2db372016-10-27 14:43:06 +0800185 } else {
186 secondary_vm_do_failover();
zhanghailiangb3f7f0c2016-10-27 14:43:05 +0800187 }
188}
189
Zhang Chen2c9639e2017-02-28 11:01:56 -0800190void qmp_xen_set_replication(bool enable, bool primary,
191 bool has_failover, bool failover,
192 Error **errp)
193{
Markus Armbruster38bb54f2017-04-27 15:00:53 +0200194#ifdef CONFIG_REPLICATION
Zhang Chen2c9639e2017-02-28 11:01:56 -0800195 ReplicationMode mode = primary ?
196 REPLICATION_MODE_PRIMARY :
197 REPLICATION_MODE_SECONDARY;
198
199 if (has_failover && enable) {
200 error_setg(errp, "Parameter 'failover' is only for"
201 " stopping replication");
202 return;
203 }
204
205 if (enable) {
206 replication_start_all(mode, errp);
207 } else {
208 if (!has_failover) {
209 failover = NULL;
210 }
211 replication_stop_all(failover, failover ? NULL : errp);
212 }
Markus Armbruster38bb54f2017-04-27 15:00:53 +0200213#else
214 abort();
215#endif
Zhang Chen2c9639e2017-02-28 11:01:56 -0800216}
217
Zhang Chendaa33c52017-02-25 10:46:04 +0800218ReplicationStatus *qmp_query_xen_replication_status(Error **errp)
219{
Markus Armbruster38bb54f2017-04-27 15:00:53 +0200220#ifdef CONFIG_REPLICATION
Zhang Chendaa33c52017-02-25 10:46:04 +0800221 Error *err = NULL;
222 ReplicationStatus *s = g_new0(ReplicationStatus, 1);
223
224 replication_get_error_all(&err);
225 if (err) {
226 s->error = true;
227 s->has_desc = true;
228 s->desc = g_strdup(error_get_pretty(err));
229 } else {
230 s->error = false;
231 }
232
233 error_free(err);
234 return s;
Markus Armbruster38bb54f2017-04-27 15:00:53 +0200235#else
236 abort();
237#endif
Zhang Chendaa33c52017-02-25 10:46:04 +0800238}
239
240void qmp_xen_colo_do_checkpoint(Error **errp)
241{
Markus Armbruster38bb54f2017-04-27 15:00:53 +0200242#ifdef CONFIG_REPLICATION
Zhang Chendaa33c52017-02-25 10:46:04 +0800243 replication_do_checkpoint_all(errp);
Markus Armbruster38bb54f2017-04-27 15:00:53 +0200244#else
245 abort();
246#endif
Zhang Chendaa33c52017-02-25 10:46:04 +0800247}
248
Zhang Chenf56c0062018-09-03 12:38:53 +0800249COLOStatus *qmp_query_colo_status(Error **errp)
250{
251 COLOStatus *s = g_new0(COLOStatus, 1);
252
253 s->mode = get_colo_mode();
254
255 switch (failover_get_state()) {
256 case FAILOVER_STATUS_NONE:
257 s->reason = COLO_EXIT_REASON_NONE;
258 break;
259 case FAILOVER_STATUS_REQUIRE:
260 s->reason = COLO_EXIT_REASON_REQUEST;
261 break;
262 default:
263 s->reason = COLO_EXIT_REASON_ERROR;
264 }
265
266 return s;
267}
268
zhanghailiang4f975582016-10-27 14:42:57 +0800269static void colo_send_message(QEMUFile *f, COLOMessage msg,
270 Error **errp)
271{
272 int ret;
273
274 if (msg >= COLO_MESSAGE__MAX) {
275 error_setg(errp, "%s: Invalid message", __func__);
276 return;
277 }
278 qemu_put_be32(f, msg);
279 qemu_fflush(f);
280
281 ret = qemu_file_get_error(f);
282 if (ret < 0) {
283 error_setg_errno(errp, -ret, "Can't send COLO message");
284 }
Markus Armbruster977c7362017-08-24 10:46:08 +0200285 trace_colo_send_message(COLOMessage_str(msg));
zhanghailiang4f975582016-10-27 14:42:57 +0800286}
287
zhanghailianga91246c2016-10-27 14:42:59 +0800288static void colo_send_message_value(QEMUFile *f, COLOMessage msg,
289 uint64_t value, Error **errp)
290{
291 Error *local_err = NULL;
292 int ret;
293
294 colo_send_message(f, msg, &local_err);
295 if (local_err) {
296 error_propagate(errp, local_err);
297 return;
298 }
299 qemu_put_be64(f, value);
300 qemu_fflush(f);
301
302 ret = qemu_file_get_error(f);
303 if (ret < 0) {
304 error_setg_errno(errp, -ret, "Failed to send value for message:%s",
Markus Armbruster977c7362017-08-24 10:46:08 +0200305 COLOMessage_str(msg));
zhanghailianga91246c2016-10-27 14:42:59 +0800306 }
307}
308
zhanghailiang4f975582016-10-27 14:42:57 +0800309static COLOMessage colo_receive_message(QEMUFile *f, Error **errp)
310{
311 COLOMessage msg;
312 int ret;
313
314 msg = qemu_get_be32(f);
315 ret = qemu_file_get_error(f);
316 if (ret < 0) {
317 error_setg_errno(errp, -ret, "Can't receive COLO message");
318 return msg;
319 }
320 if (msg >= COLO_MESSAGE__MAX) {
321 error_setg(errp, "%s: Invalid message", __func__);
322 return msg;
323 }
Markus Armbruster977c7362017-08-24 10:46:08 +0200324 trace_colo_receive_message(COLOMessage_str(msg));
zhanghailiang4f975582016-10-27 14:42:57 +0800325 return msg;
326}
327
328static void colo_receive_check_message(QEMUFile *f, COLOMessage expect_msg,
329 Error **errp)
330{
331 COLOMessage msg;
332 Error *local_err = NULL;
333
334 msg = colo_receive_message(f, &local_err);
335 if (local_err) {
336 error_propagate(errp, local_err);
337 return;
338 }
339 if (msg != expect_msg) {
340 error_setg(errp, "Unexpected COLO message %d, expected %d",
341 msg, expect_msg);
342 }
343}
344
zhanghailiang4291d372016-10-27 14:43:00 +0800345static uint64_t colo_receive_message_value(QEMUFile *f, uint32_t expect_msg,
346 Error **errp)
347{
348 Error *local_err = NULL;
349 uint64_t value;
350 int ret;
351
352 colo_receive_check_message(f, expect_msg, &local_err);
353 if (local_err) {
354 error_propagate(errp, local_err);
355 return 0;
356 }
357
358 value = qemu_get_be64(f);
359 ret = qemu_file_get_error(f);
360 if (ret < 0) {
361 error_setg_errno(errp, -ret, "Failed to get value for COLO message: %s",
Markus Armbruster977c7362017-08-24 10:46:08 +0200362 COLOMessage_str(expect_msg));
zhanghailiang4291d372016-10-27 14:43:00 +0800363 }
364 return value;
365}
366
zhanghailianga91246c2016-10-27 14:42:59 +0800367static int colo_do_checkpoint_transaction(MigrationState *s,
368 QIOChannelBuffer *bioc,
369 QEMUFile *fb)
zhanghailiang4f975582016-10-27 14:42:57 +0800370{
371 Error *local_err = NULL;
zhanghailianga91246c2016-10-27 14:42:59 +0800372 int ret = -1;
zhanghailiang4f975582016-10-27 14:42:57 +0800373
374 colo_send_message(s->to_dst_file, COLO_MESSAGE_CHECKPOINT_REQUEST,
375 &local_err);
376 if (local_err) {
377 goto out;
378 }
379
380 colo_receive_check_message(s->rp_state.from_dst_file,
381 COLO_MESSAGE_CHECKPOINT_REPLY, &local_err);
382 if (local_err) {
383 goto out;
384 }
zhanghailianga91246c2016-10-27 14:42:59 +0800385 /* Reset channel-buffer directly */
386 qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
387 bioc->usage = 0;
zhanghailiang4f975582016-10-27 14:42:57 +0800388
zhanghailianga91246c2016-10-27 14:42:59 +0800389 qemu_mutex_lock_iothread();
zhanghailiangb3f7f0c2016-10-27 14:43:05 +0800390 if (failover_get_state() != FAILOVER_STATUS_NONE) {
391 qemu_mutex_unlock_iothread();
392 goto out;
393 }
zhanghailianga91246c2016-10-27 14:42:59 +0800394 vm_stop_force_state(RUN_STATE_COLO);
395 qemu_mutex_unlock_iothread();
396 trace_colo_vm_state_change("run", "stop");
zhanghailiangb3f7f0c2016-10-27 14:43:05 +0800397 /*
398 * Failover request bh could be called after vm_stop_force_state(),
399 * So we need check failover_request_is_active() again.
400 */
401 if (failover_get_state() != FAILOVER_STATUS_NONE) {
402 goto out;
403 }
zhanghailianga91246c2016-10-27 14:42:59 +0800404
Zhang Chen131b2152018-09-03 12:38:45 +0800405 colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err);
406 if (local_err) {
407 goto out;
408 }
409
zhanghailianga91246c2016-10-27 14:42:59 +0800410 /* Disable block migration */
Juan Quintelace7c8172017-04-05 20:45:22 +0200411 migrate_set_block_enabled(false, &local_err);
zhanghailianga91246c2016-10-27 14:42:59 +0800412 qemu_mutex_lock_iothread();
Zhang Chen8e48ac92018-09-03 12:38:46 +0800413 replication_do_checkpoint_all(&local_err);
414 if (local_err) {
415 qemu_mutex_unlock_iothread();
416 goto out;
417 }
zhanghailiang4f975582016-10-27 14:42:57 +0800418
419 colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err);
420 if (local_err) {
Zhang Chen3f6df992018-09-03 12:38:54 +0800421 qemu_mutex_unlock_iothread();
zhanghailiang4f975582016-10-27 14:42:57 +0800422 goto out;
423 }
Zhang Chen3f6df992018-09-03 12:38:54 +0800424 /* Note: device state is saved into buffer */
425 ret = qemu_save_device_state(fb);
426
427 qemu_mutex_unlock_iothread();
428 if (ret < 0) {
429 goto out;
430 }
431 /*
432 * Only save VM's live state, which not including device state.
433 * TODO: We may need a timeout mechanism to prevent COLO process
434 * to be blocked here.
435 */
436 qemu_savevm_live_state(s->to_dst_file);
437
438 qemu_fflush(fb);
439
zhanghailianga91246c2016-10-27 14:42:59 +0800440 /*
441 * We need the size of the VMstate data in Secondary side,
442 * With which we can decide how much data should be read.
443 */
444 colo_send_message_value(s->to_dst_file, COLO_MESSAGE_VMSTATE_SIZE,
445 bioc->usage, &local_err);
446 if (local_err) {
447 goto out;
448 }
zhanghailiang4f975582016-10-27 14:42:57 +0800449
zhanghailianga91246c2016-10-27 14:42:59 +0800450 qemu_put_buffer(s->to_dst_file, bioc->data, bioc->usage);
451 qemu_fflush(s->to_dst_file);
452 ret = qemu_file_get_error(s->to_dst_file);
453 if (ret < 0) {
454 goto out;
455 }
zhanghailiang4f975582016-10-27 14:42:57 +0800456
457 colo_receive_check_message(s->rp_state.from_dst_file,
458 COLO_MESSAGE_VMSTATE_RECEIVED, &local_err);
459 if (local_err) {
460 goto out;
461 }
462
463 colo_receive_check_message(s->rp_state.from_dst_file,
464 COLO_MESSAGE_VMSTATE_LOADED, &local_err);
465 if (local_err) {
466 goto out;
467 }
468
zhanghailianga91246c2016-10-27 14:42:59 +0800469 ret = 0;
zhanghailiang4f975582016-10-27 14:42:57 +0800470
zhanghailianga91246c2016-10-27 14:42:59 +0800471 qemu_mutex_lock_iothread();
472 vm_start();
473 qemu_mutex_unlock_iothread();
474 trace_colo_vm_state_change("stop", "run");
475
zhanghailiang4f975582016-10-27 14:42:57 +0800476out:
477 if (local_err) {
478 error_report_err(local_err);
479 }
zhanghailianga91246c2016-10-27 14:42:59 +0800480 return ret;
zhanghailiang4f975582016-10-27 14:42:57 +0800481}
482
Zhang Chen131b2152018-09-03 12:38:45 +0800483static void colo_compare_notify_checkpoint(Notifier *notifier, void *data)
484{
485 colo_checkpoint_notify(data);
486}
487
zhanghailiang0b827d52016-10-27 14:42:54 +0800488static void colo_process_checkpoint(MigrationState *s)
489{
zhanghailianga91246c2016-10-27 14:42:59 +0800490 QIOChannelBuffer *bioc;
491 QEMUFile *fb = NULL;
zhanghailiang479125d2017-01-17 20:57:42 +0800492 int64_t current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
zhanghailiang4f975582016-10-27 14:42:57 +0800493 Error *local_err = NULL;
494 int ret;
495
zhanghailiangaef06082016-10-27 14:43:04 +0800496 failover_init_state();
497
zhanghailiang56ba83d2016-10-27 14:42:56 +0800498 s->rp_state.from_dst_file = qemu_file_get_return_path(s->to_dst_file);
499 if (!s->rp_state.from_dst_file) {
500 error_report("Open QEMUFile from_dst_file failed");
501 goto out;
502 }
503
Zhang Chen131b2152018-09-03 12:38:45 +0800504 packets_compare_notifier.notify = colo_compare_notify_checkpoint;
505 colo_compare_register_notifier(&packets_compare_notifier);
506
zhanghailiang4f975582016-10-27 14:42:57 +0800507 /*
508 * Wait for Secondary finish loading VM states and enter COLO
509 * restore.
510 */
511 colo_receive_check_message(s->rp_state.from_dst_file,
512 COLO_MESSAGE_CHECKPOINT_READY, &local_err);
513 if (local_err) {
514 goto out;
515 }
zhanghailianga91246c2016-10-27 14:42:59 +0800516 bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
517 fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc));
518 object_unref(OBJECT(bioc));
zhanghailiang4f975582016-10-27 14:42:57 +0800519
zhanghailiang0b827d52016-10-27 14:42:54 +0800520 qemu_mutex_lock_iothread();
Zhang Chen8e48ac92018-09-03 12:38:46 +0800521 replication_start_all(REPLICATION_MODE_PRIMARY, &local_err);
522 if (local_err) {
523 qemu_mutex_unlock_iothread();
524 goto out;
525 }
526
zhanghailiang0b827d52016-10-27 14:42:54 +0800527 vm_start();
528 qemu_mutex_unlock_iothread();
529 trace_colo_vm_state_change("stop", "run");
530
zhanghailiang479125d2017-01-17 20:57:42 +0800531 timer_mod(s->colo_delay_timer,
532 current_time + s->parameters.x_checkpoint_delay);
533
zhanghailiang4f975582016-10-27 14:42:57 +0800534 while (s->state == MIGRATION_STATUS_COLO) {
zhanghailiangb3f7f0c2016-10-27 14:43:05 +0800535 if (failover_get_state() != FAILOVER_STATUS_NONE) {
536 error_report("failover request");
537 goto out;
538 }
539
zhanghailiang479125d2017-01-17 20:57:42 +0800540 qemu_sem_wait(&s->colo_checkpoint_sem);
zhanghailiang18cc23d2016-10-27 14:43:02 +0800541
zhanghailianga91246c2016-10-27 14:42:59 +0800542 ret = colo_do_checkpoint_transaction(s, bioc, fb);
zhanghailiang4f975582016-10-27 14:42:57 +0800543 if (ret < 0) {
544 goto out;
545 }
546 }
zhanghailiang0b827d52016-10-27 14:42:54 +0800547
zhanghailiang56ba83d2016-10-27 14:42:56 +0800548out:
zhanghailiang4f975582016-10-27 14:42:57 +0800549 /* Throw the unreported error message after exited from loop */
550 if (local_err) {
551 error_report_err(local_err);
552 }
553
zhanghailianga91246c2016-10-27 14:42:59 +0800554 if (fb) {
555 qemu_fclose(fb);
556 }
557
zhanghailiang9ecff6d2018-09-03 12:38:51 +0800558 /*
559 * There are only two reasons we can get here, some error happened
560 * or the user triggered failover.
561 */
562 switch (failover_get_state()) {
563 case FAILOVER_STATUS_NONE:
564 qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
565 COLO_EXIT_REASON_ERROR);
566 break;
567 case FAILOVER_STATUS_REQUIRE:
568 qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
569 COLO_EXIT_REASON_REQUEST);
570 break;
571 default:
572 abort();
573 }
574
zhanghailiangc937b9a2017-01-17 20:57:43 +0800575 /* Hope this not to be too long to wait here */
576 qemu_sem_wait(&s->colo_exit_sem);
577 qemu_sem_destroy(&s->colo_exit_sem);
Zhang Chen131b2152018-09-03 12:38:45 +0800578
579 /*
580 * It is safe to unregister notifier after failover finished.
581 * Besides, colo_delay_timer and colo_checkpoint_sem can't be
582 * released befor unregister notifier, or there will be use-after-free
583 * error.
584 */
585 colo_compare_unregister_notifier(&packets_compare_notifier);
586 timer_del(s->colo_delay_timer);
587 timer_free(s->colo_delay_timer);
588 qemu_sem_destroy(&s->colo_checkpoint_sem);
589
zhanghailiangc937b9a2017-01-17 20:57:43 +0800590 /*
591 * Must be called after failover BH is completed,
592 * Or the failover BH may shutdown the wrong fd that
593 * re-used by other threads after we release here.
594 */
zhanghailiang56ba83d2016-10-27 14:42:56 +0800595 if (s->rp_state.from_dst_file) {
596 qemu_fclose(s->rp_state.from_dst_file);
597 }
zhanghailiang0b827d52016-10-27 14:42:54 +0800598}
599
zhanghailiang479125d2017-01-17 20:57:42 +0800600void colo_checkpoint_notify(void *opaque)
601{
602 MigrationState *s = opaque;
603 int64_t next_notify_time;
604
605 qemu_sem_post(&s->colo_checkpoint_sem);
606 s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
607 next_notify_time = s->colo_checkpoint_time +
608 s->parameters.x_checkpoint_delay;
609 timer_mod(s->colo_delay_timer, next_notify_time);
610}
611
zhanghailiang0b827d52016-10-27 14:42:54 +0800612void migrate_start_colo_process(MigrationState *s)
613{
614 qemu_mutex_unlock_iothread();
zhanghailiang479125d2017-01-17 20:57:42 +0800615 qemu_sem_init(&s->colo_checkpoint_sem, 0);
616 s->colo_delay_timer = timer_new_ms(QEMU_CLOCK_HOST,
617 colo_checkpoint_notify, s);
618
zhanghailiangc937b9a2017-01-17 20:57:43 +0800619 qemu_sem_init(&s->colo_exit_sem, 0);
zhanghailiang0b827d52016-10-27 14:42:54 +0800620 migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
621 MIGRATION_STATUS_COLO);
622 colo_process_checkpoint(s);
623 qemu_mutex_lock_iothread();
624}
zhanghailiang25d0c162016-10-27 14:42:55 +0800625
zhanghailiang4f975582016-10-27 14:42:57 +0800626static void colo_wait_handle_message(QEMUFile *f, int *checkpoint_request,
627 Error **errp)
628{
629 COLOMessage msg;
630 Error *local_err = NULL;
631
632 msg = colo_receive_message(f, &local_err);
633 if (local_err) {
634 error_propagate(errp, local_err);
635 return;
636 }
637
638 switch (msg) {
639 case COLO_MESSAGE_CHECKPOINT_REQUEST:
640 *checkpoint_request = 1;
641 break;
642 default:
643 *checkpoint_request = 0;
644 error_setg(errp, "Got unknown COLO message: %d", msg);
645 break;
646 }
647}
648
zhanghailiang25d0c162016-10-27 14:42:55 +0800649void *colo_process_incoming_thread(void *opaque)
650{
651 MigrationIncomingState *mis = opaque;
zhanghailiang4291d372016-10-27 14:43:00 +0800652 QEMUFile *fb = NULL;
653 QIOChannelBuffer *bioc = NULL; /* Cache incoming device state */
654 uint64_t total_size;
655 uint64_t value;
zhanghailiang4f975582016-10-27 14:42:57 +0800656 Error *local_err = NULL;
Zhang Chen3f6df992018-09-03 12:38:54 +0800657 int ret;
zhanghailiang25d0c162016-10-27 14:42:55 +0800658
Lidong Chen74637e62018-08-06 21:29:29 +0800659 rcu_register_thread();
zhanghailiangc937b9a2017-01-17 20:57:43 +0800660 qemu_sem_init(&mis->colo_incoming_sem, 0);
661
zhanghailiang25d0c162016-10-27 14:42:55 +0800662 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
663 MIGRATION_STATUS_COLO);
664
zhanghailiangaef06082016-10-27 14:43:04 +0800665 failover_init_state();
666
zhanghailiang56ba83d2016-10-27 14:42:56 +0800667 mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
668 if (!mis->to_src_file) {
669 error_report("COLO incoming thread: Open QEMUFile to_src_file failed");
670 goto out;
671 }
672 /*
673 * Note: the communication between Primary side and Secondary side
674 * should be sequential, we set the fd to unblocked in migration incoming
675 * coroutine, and here we are in the COLO incoming thread, so it is ok to
676 * set the fd back to blocked.
677 */
678 qemu_file_set_blocking(mis->from_src_file, true);
679
zhanghailiang4291d372016-10-27 14:43:00 +0800680 bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
681 fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
682 object_unref(OBJECT(bioc));
683
Zhang Chen131b2152018-09-03 12:38:45 +0800684 qemu_mutex_lock_iothread();
Zhang Chen8e48ac92018-09-03 12:38:46 +0800685 replication_start_all(REPLICATION_MODE_SECONDARY, &local_err);
686 if (local_err) {
687 qemu_mutex_unlock_iothread();
688 goto out;
689 }
Zhang Chen131b2152018-09-03 12:38:45 +0800690 vm_start();
691 trace_colo_vm_state_change("stop", "run");
692 qemu_mutex_unlock_iothread();
693
zhanghailiang4f975582016-10-27 14:42:57 +0800694 colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
695 &local_err);
696 if (local_err) {
697 goto out;
698 }
699
700 while (mis->state == MIGRATION_STATUS_COLO) {
Jeff Cody02ba9262016-10-31 17:50:44 -0400701 int request = 0;
zhanghailiang4f975582016-10-27 14:42:57 +0800702
703 colo_wait_handle_message(mis->from_src_file, &request, &local_err);
704 if (local_err) {
705 goto out;
706 }
707 assert(request);
zhanghailiang9d2db372016-10-27 14:43:06 +0800708 if (failover_get_state() != FAILOVER_STATUS_NONE) {
709 error_report("failover request");
710 goto out;
711 }
712
Zhang Chen131b2152018-09-03 12:38:45 +0800713 qemu_mutex_lock_iothread();
714 vm_stop_force_state(RUN_STATE_COLO);
715 trace_colo_vm_state_change("run", "stop");
716 qemu_mutex_unlock_iothread();
717
zhanghailiang4f975582016-10-27 14:42:57 +0800718 /* FIXME: This is unnecessary for periodic checkpoint mode */
719 colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY,
720 &local_err);
721 if (local_err) {
722 goto out;
723 }
724
725 colo_receive_check_message(mis->from_src_file,
726 COLO_MESSAGE_VMSTATE_SEND, &local_err);
727 if (local_err) {
728 goto out;
729 }
730
Zhang Chen3f6df992018-09-03 12:38:54 +0800731 qemu_mutex_lock_iothread();
732 cpu_synchronize_all_pre_loadvm();
733 ret = qemu_loadvm_state_main(mis->from_src_file, mis);
734 qemu_mutex_unlock_iothread();
735
736 if (ret < 0) {
737 error_report("Load VM's live state (ram) error");
738 goto out;
739 }
740
zhanghailiang4291d372016-10-27 14:43:00 +0800741 value = colo_receive_message_value(mis->from_src_file,
742 COLO_MESSAGE_VMSTATE_SIZE, &local_err);
743 if (local_err) {
744 goto out;
745 }
746
747 /*
748 * Read VM device state data into channel buffer,
749 * It's better to re-use the memory allocated.
750 * Here we need to handle the channel buffer directly.
751 */
752 if (value > bioc->capacity) {
753 bioc->capacity = value;
754 bioc->data = g_realloc(bioc->data, bioc->capacity);
755 }
756 total_size = qemu_get_buffer(mis->from_src_file, bioc->data, value);
757 if (total_size != value) {
758 error_report("Got %" PRIu64 " VMState data, less than expected"
759 " %" PRIu64, total_size, value);
760 goto out;
761 }
762 bioc->usage = total_size;
763 qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
zhanghailiang4f975582016-10-27 14:42:57 +0800764
765 colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED,
766 &local_err);
767 if (local_err) {
768 goto out;
769 }
770
zhanghailiang4291d372016-10-27 14:43:00 +0800771 qemu_mutex_lock_iothread();
zhanghailianga8664ba2017-01-17 20:57:44 +0800772 vmstate_loading = true;
Zhang Chen3f6df992018-09-03 12:38:54 +0800773 ret = qemu_load_device_state(fb);
774 if (ret < 0) {
775 error_report("COLO: load device state failed");
zhanghailiang4291d372016-10-27 14:43:00 +0800776 qemu_mutex_unlock_iothread();
777 goto out;
778 }
zhanghailianga8664ba2017-01-17 20:57:44 +0800779
Zhang Chen8e48ac92018-09-03 12:38:46 +0800780 replication_get_error_all(&local_err);
781 if (local_err) {
782 qemu_mutex_unlock_iothread();
783 goto out;
784 }
785 /* discard colo disk buffer */
786 replication_do_checkpoint_all(&local_err);
787 if (local_err) {
788 qemu_mutex_unlock_iothread();
789 goto out;
790 }
791
zhanghailiang7b343532018-09-03 12:38:58 +0800792 /* Notify all filters of all NIC to do checkpoint */
793 colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err);
794
795 if (local_err) {
796 qemu_mutex_unlock_iothread();
797 goto out;
798 }
799
zhanghailianga8664ba2017-01-17 20:57:44 +0800800 vmstate_loading = false;
Zhang Chen131b2152018-09-03 12:38:45 +0800801 vm_start();
802 trace_colo_vm_state_change("stop", "run");
zhanghailiang4291d372016-10-27 14:43:00 +0800803 qemu_mutex_unlock_iothread();
zhanghailiang4f975582016-10-27 14:42:57 +0800804
zhanghailianga8664ba2017-01-17 20:57:44 +0800805 if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
806 failover_set_state(FAILOVER_STATUS_RELAUNCH,
807 FAILOVER_STATUS_NONE);
808 failover_request_active(NULL);
809 goto out;
810 }
811
zhanghailiang4f975582016-10-27 14:42:57 +0800812 colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED,
813 &local_err);
814 if (local_err) {
815 goto out;
816 }
817 }
zhanghailiang25d0c162016-10-27 14:42:55 +0800818
zhanghailiang56ba83d2016-10-27 14:42:56 +0800819out:
zhanghailianga8664ba2017-01-17 20:57:44 +0800820 vmstate_loading = false;
zhanghailiang4f975582016-10-27 14:42:57 +0800821 /* Throw the unreported error message after exited from loop */
822 if (local_err) {
823 error_report_err(local_err);
824 }
825
zhanghailiang9ecff6d2018-09-03 12:38:51 +0800826 switch (failover_get_state()) {
827 case FAILOVER_STATUS_NONE:
828 qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
829 COLO_EXIT_REASON_ERROR);
830 break;
831 case FAILOVER_STATUS_REQUIRE:
832 qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
833 COLO_EXIT_REASON_REQUEST);
834 break;
835 default:
836 abort();
837 }
838
zhanghailiang4291d372016-10-27 14:43:00 +0800839 if (fb) {
840 qemu_fclose(fb);
841 }
842
zhanghailiangc937b9a2017-01-17 20:57:43 +0800843 /* Hope this not to be too long to loop here */
844 qemu_sem_wait(&mis->colo_incoming_sem);
845 qemu_sem_destroy(&mis->colo_incoming_sem);
846 /* Must be called after failover BH is completed */
zhanghailiang56ba83d2016-10-27 14:42:56 +0800847 if (mis->to_src_file) {
848 qemu_fclose(mis->to_src_file);
849 }
Zhang Chenaad555c2018-09-03 12:38:47 +0800850 migration_incoming_disable_colo();
zhanghailiang25d0c162016-10-27 14:42:55 +0800851
Lidong Chen74637e62018-08-06 21:29:29 +0800852 rcu_unregister_thread();
zhanghailiang25d0c162016-10-27 14:42:55 +0800853 return NULL;
854}