Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20180615a' into staging

Migration pull 2018-06-15

# gpg: Signature made Fri 15 Jun 2018 16:13:17 BST
# gpg:                using RSA key 0516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>"
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A  9FA9 0516 331E BC5B FDE7

* remotes/dgilbert/tags/pull-migration-20180615a:
  migration: calculate expected_downtime with ram_bytes_remaining()
  migration/postcopy: Wake rate limit sleep on postcopy request
  migration: Wake rate limiting for urgent requests
  migration/postcopy: Add max-postcopy-bandwidth parameter
  migration: introduce migration_update_rates
  migration: fix counting xbzrle cache_miss_rate
  migration/block-dirty-bitmap: fix dirty_bitmap_load
  migration: Poison ramblock loops in migration
  migration: Fixes for non-migratable RAMBlocks
  typedefs: add QJSON

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/hmp.c b/hmp.c
index ef93f48..f40d827 100644
--- a/hmp.c
+++ b/hmp.c
@@ -370,6 +370,9 @@
         monitor_printf(mon, "%s: %" PRIu64 "\n",
             MigrationParameter_str(MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE),
             params->xbzrle_cache_size);
+        monitor_printf(mon, "%s: %" PRIu64 "\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_MAX_POSTCOPY_BANDWIDTH),
+            params->max_postcopy_bandwidth);
     }
 
     qapi_free_MigrationParameters(params);
@@ -1676,6 +1679,10 @@
         }
         p->xbzrle_cache_size = cache_size;
         break;
+    case MIGRATION_PARAMETER_MAX_POSTCOPY_BANDWIDTH:
+        p->has_max_postcopy_bandwidth = true;
+        visit_type_size(v, param, &p->max_postcopy_bandwidth, &err);
+        break;
     default:
         assert(0);
     }
diff --git a/include/exec/ramlist.h b/include/exec/ramlist.h
index 2e2ac6c..bc4faa1 100644
--- a/include/exec/ramlist.h
+++ b/include/exec/ramlist.h
@@ -56,8 +56,10 @@
 extern RAMList ram_list;
 
 /* Should be holding either ram_list.mutex, or the RCU lock. */
-#define  RAMBLOCK_FOREACH(block)  \
+#define  INTERNAL_RAMBLOCK_FOREACH(block)  \
     QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
+/* Never use the INTERNAL_ version except for defining other macros */
+#define RAMBLOCK_FOREACH(block) INTERNAL_RAMBLOCK_FOREACH(block)
 
 void qemu_mutex_lock_ramlist(void);
 void qemu_mutex_unlock_ramlist(void);
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 3747110..42b946c 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -27,8 +27,6 @@
 #ifndef QEMU_VMSTATE_H
 #define QEMU_VMSTATE_H
 
-typedef struct QJSON QJSON;
-
 typedef struct VMStateInfo VMStateInfo;
 typedef struct VMStateDescription VMStateDescription;
 typedef struct VMStateField VMStateField;
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index 325c72d..3ec0e13 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -97,6 +97,7 @@
 typedef struct QEMUTimerListGroup QEMUTimerListGroup;
 typedef struct QBool QBool;
 typedef struct QDict QDict;
+typedef struct QJSON QJSON;
 typedef struct QList QList;
 typedef struct QNull QNull;
 typedef struct QNum QNum;
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index eeccaff..3bafbbd 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -672,6 +672,9 @@
 
     do {
         ret = dirty_bitmap_load_header(f, &s);
+        if (ret < 0) {
+            return ret;
+        }
 
         if (s.flags & DIRTY_BITMAP_MIG_FLAG_START) {
             ret = dirty_bitmap_load_start(f, &s);
diff --git a/migration/migration.c b/migration/migration.c
index 1e99ec9..e1eaa97 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -82,6 +82,11 @@
 #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2
 #define DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT 16
 
+/* Background transfer rate for postcopy, 0 means unlimited, note
+ * that page requests can still exceed this limit.
+ */
+#define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0
+
 static NotifierList migration_state_notifiers =
     NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
 
@@ -659,6 +664,8 @@
     params->x_multifd_page_count = s->parameters.x_multifd_page_count;
     params->has_xbzrle_cache_size = true;
     params->xbzrle_cache_size = s->parameters.xbzrle_cache_size;
+    params->has_max_postcopy_bandwidth = true;
+    params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth;
 
     return params;
 }
@@ -1066,6 +1073,9 @@
     if (params->has_xbzrle_cache_size) {
         dest->xbzrle_cache_size = params->xbzrle_cache_size;
     }
+    if (params->has_max_postcopy_bandwidth) {
+        dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth;
+    }
 }
 
 static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
@@ -1138,6 +1148,9 @@
         s->parameters.xbzrle_cache_size = params->xbzrle_cache_size;
         xbzrle_cache_resize(params->xbzrle_cache_size, errp);
     }
+    if (params->has_max_postcopy_bandwidth) {
+        s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth;
+    }
 }
 
 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
@@ -1887,6 +1900,16 @@
     return s->parameters.xbzrle_cache_size;
 }
 
+static int64_t migrate_max_postcopy_bandwidth(void)
+{
+    MigrationState *s;
+
+    s = migrate_get_current();
+
+    return s->parameters.max_postcopy_bandwidth;
+}
+
+
 bool migrate_use_block(void)
 {
     MigrationState *s;
@@ -2226,6 +2249,7 @@
     QIOChannelBuffer *bioc;
     QEMUFile *fb;
     int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+    int64_t bandwidth = migrate_max_postcopy_bandwidth();
     bool restart_block = false;
     int cur_state = MIGRATION_STATUS_ACTIVE;
     if (!migrate_pause_before_switchover()) {
@@ -2280,7 +2304,12 @@
      * will notice we're in POSTCOPY_ACTIVE and not actually
      * wrap their state up here
      */
-    qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX);
+    /* 0 max-postcopy-bandwidth means unlimited */
+    if (!bandwidth) {
+        qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX);
+    } else {
+        qemu_file_set_rate_limit(ms->to_dst_file, bandwidth / XFER_LIMIT_RATIO);
+    }
     if (migrate_postcopy_ram()) {
         /* Ping just for debugging, helps line traces up */
         qemu_savevm_send_ping(ms->to_dst_file, 2);
@@ -2717,8 +2746,7 @@
      * recalculate. 10000 is a small enough number for our purposes
      */
     if (ram_counters.dirty_pages_rate && transferred > 10000) {
-        s->expected_downtime = ram_counters.dirty_pages_rate *
-            qemu_target_page_size() / bandwidth;
+        s->expected_downtime = ram_counters.remaining / bandwidth;
     }
 
     qemu_file_reset_rate_limit(s->to_dst_file);
@@ -2823,6 +2851,16 @@
     qemu_mutex_unlock_iothread();
 }
 
+void migration_make_urgent_request(void)
+{
+    qemu_sem_post(&migrate_get_current()->rate_limit_sem);
+}
+
+void migration_consume_urgent_request(void)
+{
+    qemu_sem_wait(&migrate_get_current()->rate_limit_sem);
+}
+
 /*
  * Master migration thread on the source VM.
  * It drives the migration and pumps the data down the outgoing channel.
@@ -2832,6 +2870,7 @@
     MigrationState *s = opaque;
     int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
     MigThrError thr_error;
+    bool urgent = false;
 
     rcu_register_thread();
 
@@ -2872,7 +2911,7 @@
            s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
         int64_t current_time;
 
-        if (!qemu_file_rate_limit(s->to_dst_file)) {
+        if (urgent || !qemu_file_rate_limit(s->to_dst_file)) {
             MigIterateState iter_state = migration_iteration_run(s);
             if (iter_state == MIG_ITERATE_SKIP) {
                 continue;
@@ -2903,10 +2942,24 @@
 
         migration_update_counters(s, current_time);
 
+        urgent = false;
         if (qemu_file_rate_limit(s->to_dst_file)) {
-            /* usleep expects microseconds */
-            g_usleep((s->iteration_start_time + BUFFER_DELAY -
-                      current_time) * 1000);
+            /* Wait for a delay to do rate limiting OR
+             * something urgent to post the semaphore.
+             */
+            int ms = s->iteration_start_time + BUFFER_DELAY - current_time;
+            trace_migration_thread_ratelimit_pre(ms);
+            if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) {
+                /* We were worken by one or more urgent things but
+                 * the timedwait will have consumed one of them.
+                 * The service routine for the urgent wake will dec
+                 * the semaphore itself for each item it consumes,
+                 * so add this one we just eat back.
+                 */
+                qemu_sem_post(&s->rate_limit_sem);
+                urgent = true;
+            }
+            trace_migration_thread_ratelimit_post(urgent);
         }
     }
 
@@ -3042,6 +3095,9 @@
     DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState,
                       parameters.xbzrle_cache_size,
                       DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE),
+    DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState,
+                      parameters.max_postcopy_bandwidth,
+                      DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH),
 
     /* Migration capabilities */
     DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
@@ -3077,6 +3133,7 @@
     qemu_mutex_destroy(&ms->qemu_file_lock);
     g_free(params->tls_hostname);
     g_free(params->tls_creds);
+    qemu_sem_destroy(&ms->rate_limit_sem);
     qemu_sem_destroy(&ms->pause_sem);
     qemu_sem_destroy(&ms->postcopy_pause_sem);
     qemu_sem_destroy(&ms->postcopy_pause_rp_sem);
@@ -3110,10 +3167,12 @@
     params->has_x_multifd_channels = true;
     params->has_x_multifd_page_count = true;
     params->has_xbzrle_cache_size = true;
+    params->has_max_postcopy_bandwidth = true;
 
     qemu_sem_init(&ms->postcopy_pause_sem, 0);
     qemu_sem_init(&ms->postcopy_pause_rp_sem, 0);
     qemu_sem_init(&ms->rp_state.rp_sem, 0);
+    qemu_sem_init(&ms->rate_limit_sem, 0);
     qemu_mutex_init(&ms->qemu_file_lock);
 }
 
diff --git a/migration/migration.h b/migration/migration.h
index 5af57d6..64a7b33 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -121,6 +121,11 @@
      */
     QemuMutex qemu_file_lock;
 
+    /*
+     * Used to allow urgent requests to override rate limiting.
+     */
+    QemuSemaphore rate_limit_sem;
+
     /* bytes already send at the beggining of current interation */
     uint64_t iteration_initial_bytes;
     /* time at the start of current iteration */
@@ -284,4 +289,10 @@
 void dirty_bitmap_mig_before_vm_start(void);
 void init_dirty_bitmap_incoming_migration(void);
 
+#define qemu_ram_foreach_block \
+  #warning "Use qemu_ram_foreach_block_migratable in migration code"
+
+void migration_make_urgent_request(void);
+void migration_consume_urgent_request(void);
+
 #endif
diff --git a/migration/qjson.h b/migration/qjson.h
index 2978b5f..41664f2 100644
--- a/migration/qjson.h
+++ b/migration/qjson.h
@@ -13,8 +13,6 @@
 #ifndef QEMU_QJSON_H
 #define QEMU_QJSON_H
 
-typedef struct QJSON QJSON;
-
 QJSON *qjson_new(void);
 void qjson_destroy(QJSON *json);
 void json_prop_str(QJSON *json, const char *name, const char *str);
diff --git a/migration/ram.c b/migration/ram.c
index a500015..cd5f551 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -159,9 +159,11 @@
 
 /* Should be holding either ram_list.mutex, or the RCU lock. */
 #define RAMBLOCK_FOREACH_MIGRATABLE(block)             \
-    RAMBLOCK_FOREACH(block)                            \
+    INTERNAL_RAMBLOCK_FOREACH(block)                   \
         if (!qemu_ram_is_migratable(block)) {} else
 
+#undef RAMBLOCK_FOREACH
+
 static void ramblock_recv_map_init(void)
 {
     RAMBlock *rb;
@@ -1139,6 +1141,25 @@
     return summary;
 }
 
+static void migration_update_rates(RAMState *rs, int64_t end_time)
+{
+    uint64_t iter_count = rs->iterations - rs->iterations_prev;
+
+    /* calculate period counters */
+    ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
+                / (end_time - rs->time_last_bitmap_sync);
+
+    if (!iter_count) {
+        return;
+    }
+
+    if (migrate_use_xbzrle()) {
+        xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
+            rs->xbzrle_cache_miss_prev) / iter_count;
+        rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
+    }
+}
+
 static void migration_bitmap_sync(RAMState *rs)
 {
     RAMBlock *block;
@@ -1159,6 +1180,7 @@
     RAMBLOCK_FOREACH_MIGRATABLE(block) {
         migration_bitmap_sync_range(rs, block, 0, block->used_length);
     }
+    ram_counters.remaining = ram_bytes_remaining();
     rcu_read_unlock();
     qemu_mutex_unlock(&rs->bitmap_mutex);
 
@@ -1168,9 +1190,6 @@
 
     /* more than 1 second = 1000 millisecons */
     if (end_time > rs->time_last_bitmap_sync + 1000) {
-        /* calculate period counters */
-        ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
-            / (end_time - rs->time_last_bitmap_sync);
         bytes_xfer_now = ram_counters.transferred;
 
         /* During block migration the auto-converge logic incorrectly detects
@@ -1192,16 +1211,9 @@
             }
         }
 
-        if (migrate_use_xbzrle()) {
-            if (rs->iterations_prev != rs->iterations) {
-                xbzrle_counters.cache_miss_rate =
-                   (double)(xbzrle_counters.cache_miss -
-                            rs->xbzrle_cache_miss_prev) /
-                   (rs->iterations - rs->iterations_prev);
-            }
-            rs->iterations_prev = rs->iterations;
-            rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
-        }
+        migration_update_rates(rs, end_time);
+
+        rs->iterations_prev = rs->iterations;
 
         /* reset period counters */
         rs->time_last_bitmap_sync = end_time;
@@ -1536,6 +1548,7 @@
             memory_region_unref(block->mr);
             QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
             g_free(entry);
+            migration_consume_urgent_request();
         }
     }
     qemu_mutex_unlock(&rs->src_page_req_mutex);
@@ -1684,6 +1697,7 @@
     memory_region_ref(ramblock->mr);
     qemu_mutex_lock(&rs->src_page_req_mutex);
     QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
+    migration_make_urgent_request();
     qemu_mutex_unlock(&rs->src_page_req_mutex);
     rcu_read_unlock();
 
@@ -2516,7 +2530,7 @@
      * about dirty page logging as well.
      */
 
-    RAMBLOCK_FOREACH(block) {
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
         pages += bitmap_count_one(block->bmap,
                                   block->used_length >> TARGET_PAGE_BITS);
     }
@@ -2632,9 +2646,14 @@
 
     t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
     i = 0;
-    while ((ret = qemu_file_rate_limit(f)) == 0) {
+    while ((ret = qemu_file_rate_limit(f)) == 0 ||
+            !QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
         int pages;
 
+        if (qemu_file_get_error(f)) {
+            break;
+        }
+
         pages = ram_find_and_save_block(rs, false);
         /* no more pages to sent */
         if (pages == 0) {
@@ -3431,7 +3450,7 @@
 
     trace_ram_dirty_bitmap_sync_start();
 
-    RAMBLOCK_FOREACH(block) {
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
         qemu_savevm_send_recv_bitmap(file, block->idstr);
         trace_ram_dirty_bitmap_request(block->idstr);
         ramblock_count++;
diff --git a/migration/rdma.c b/migration/rdma.c
index 05aee3d..8bd7159 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -635,7 +635,7 @@
 
     assert(rdma->blockmap == NULL);
     memset(local, 0, sizeof *local);
-    qemu_ram_foreach_block(qemu_rdma_init_one_block, rdma);
+    qemu_ram_foreach_migratable_block(qemu_rdma_init_one_block, rdma);
     trace_qemu_rdma_init_ram_blocks(local->nb_blocks);
     rdma->dest_blocks = g_new0(RDMADestBlock,
                                rdma->local_ram_blocks.nb_blocks);
diff --git a/migration/trace-events b/migration/trace-events
index 4a768ea..3f67758 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -108,6 +108,8 @@
 migration_return_path_end_after(int rp_error) "%d"
 migration_thread_after_loop(void) ""
 migration_thread_file_err(void) ""
+migration_thread_ratelimit_pre(int ms) "%d ms"
+migration_thread_ratelimit_post(int urgent) "urgent: %d"
 migration_thread_setup_complete(void) ""
 open_return_path_on_source(void) ""
 open_return_path_on_source_continue(void) ""
diff --git a/qapi/migration.json b/qapi/migration.json
index f7e10ee..1b4c1db 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -517,6 +517,9 @@
 #                     and a power of 2
 #                     (Since 2.11)
 #
+# @max-postcopy-bandwidth: Background transfer bandwidth during postcopy.
+#                     Defaults to 0 (unlimited).  In bytes per second.
+#                     (Since 3.0)
 # Since: 2.4
 ##
 { 'enum': 'MigrationParameter',
@@ -525,7 +528,7 @@
            'tls-creds', 'tls-hostname', 'max-bandwidth',
            'downtime-limit', 'x-checkpoint-delay', 'block-incremental',
            'x-multifd-channels', 'x-multifd-page-count',
-           'xbzrle-cache-size' ] }
+           'xbzrle-cache-size', 'max-postcopy-bandwidth' ] }
 
 ##
 # @MigrateSetParameters:
@@ -593,6 +596,10 @@
 #                     needs to be a multiple of the target page size
 #                     and a power of 2
 #                     (Since 2.11)
+#
+# @max-postcopy-bandwidth: Background transfer bandwidth during postcopy.
+#                     Defaults to 0 (unlimited).  In bytes per second.
+#                     (Since 3.0)
 # Since: 2.4
 ##
 # TODO either fuse back into MigrationParameters, or make
@@ -611,7 +618,8 @@
             '*block-incremental': 'bool',
             '*x-multifd-channels': 'int',
             '*x-multifd-page-count': 'int',
-            '*xbzrle-cache-size': 'size' } }
+            '*xbzrle-cache-size': 'size',
+            '*max-postcopy-bandwidth': 'size' } }
 
 ##
 # @migrate-set-parameters:
@@ -694,6 +702,10 @@
 #                     needs to be a multiple of the target page size
 #                     and a power of 2
 #                     (Since 2.11)
+#
+# @max-postcopy-bandwidth: Background transfer bandwidth during postcopy.
+#                     Defaults to 0 (unlimited).  In bytes per second.
+#                     (Since 3.0)
 # Since: 2.4
 ##
 { 'struct': 'MigrationParameters',
@@ -710,7 +722,8 @@
             '*block-incremental': 'bool' ,
             '*x-multifd-channels': 'uint8',
             '*x-multifd-page-count': 'uint32',
-            '*xbzrle-cache-size': 'size' } }
+            '*xbzrle-cache-size': 'size',
+            '*max-postcopy-bandwidth': 'size'  } }
 
 ##
 # @query-migrate-parameters: