blob: 50d58c0c9cecd1beb5257eb94001d6cffa8d0e1e [file] [log] [blame]
Juan Quintelad32ca5a2020-01-22 16:16:07 +01001/*
2 * Multifd common functions
3 *
4 * Copyright (c) 2019-2020 Red Hat Inc
5 *
6 * Authors:
7 * Juan Quintela <quintela@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 */
12
13#ifndef QEMU_MIGRATION_MULTIFD_H
14#define QEMU_MIGRATION_MULTIFD_H
15
Fabiano Rosas90fa1212024-08-27 14:45:49 -030016#include "exec/target_page.h"
Fabiano Rosasa49d15a2024-02-29 12:30:15 -030017#include "ram.h"
18
Fabiano Rosasd117ed02024-02-29 12:30:09 -030019typedef struct MultiFDRecvData MultiFDRecvData;
Fabiano Rosasaddd7d12024-08-27 14:45:52 -030020typedef struct MultiFDSendData MultiFDSendData;
Fabiano Rosasd117ed02024-02-29 12:30:09 -030021
Fabiano Rosasbd8b0a82024-02-06 18:51:15 -030022bool multifd_send_setup(void);
Peter Xucde85c32024-02-02 18:28:55 +080023void multifd_send_shutdown(void);
Fabiano Rosasa8a3e712024-02-29 12:30:10 -030024void multifd_send_channel_created(void);
Peter Xucde85c32024-02-02 18:28:55 +080025int multifd_recv_setup(Error **errp);
26void multifd_recv_cleanup(void);
27void multifd_recv_shutdown(void);
Juan Quintelad32ca5a2020-01-22 16:16:07 +010028bool multifd_recv_all_channels_created(void);
manish.mishra6720c2b2022-12-20 18:44:18 +000029void multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
Juan Quintelad32ca5a2020-01-22 16:16:07 +010030void multifd_recv_sync_main(void);
Fabiano Rosas9346fa12024-01-04 11:21:39 -030031int multifd_send_sync_main(void);
Peter Xud6556d12024-02-02 18:28:50 +080032bool multifd_queue_page(RAMBlock *block, ram_addr_t offset);
Fabiano Rosasd117ed02024-02-29 12:30:09 -030033bool multifd_recv(void);
34MultiFDRecvData *multifd_get_recv_data(void);
Juan Quintelad32ca5a2020-01-22 16:16:07 +010035
Juan Quintela7ec2c2b2019-01-04 15:30:06 +010036/* Multifd Compression flags */
Juan Quintelad32ca5a2020-01-22 16:16:07 +010037#define MULTIFD_FLAG_SYNC (1 << 0)
38
Bryan Zhang80484f92024-08-30 16:27:21 -070039/* We reserve 5 bits for compression methods */
40#define MULTIFD_FLAG_COMPRESSION_MASK (0x1f << 1)
Juan Quintelaab7cbb02019-05-15 13:37:46 +020041/* we need to be compatible. Before compression value was 0 */
42#define MULTIFD_FLAG_NOCOMP (0 << 1)
Juan Quintela7ec2c2b2019-01-04 15:30:06 +010043#define MULTIFD_FLAG_ZLIB (1 << 1)
Juan Quintela87dc6f52019-12-13 13:47:14 +010044#define MULTIFD_FLAG_ZSTD (2 << 1)
Yuan Liu354cac22024-06-10 18:21:07 +080045#define MULTIFD_FLAG_QPL (4 << 1)
Shameer Kolothumf3d8bb72024-06-07 14:53:06 +010046#define MULTIFD_FLAG_UADK (8 << 1)
Bryan Zhang80484f92024-08-30 16:27:21 -070047#define MULTIFD_FLAG_QATZIP (16 << 1)
Juan Quintelaab7cbb02019-05-15 13:37:46 +020048
Juan Quintelad32ca5a2020-01-22 16:16:07 +010049/* This value needs to be a multiple of qemu_target_page_size() */
50#define MULTIFD_PACKET_SIZE (512 * 1024)
51
52typedef struct {
53 uint32_t magic;
54 uint32_t version;
55 uint32_t flags;
56 /* maximum number of allocated pages */
57 uint32_t pages_alloc;
Juan Quintela8c0ec0b2021-11-22 14:13:51 +010058 /* non zero pages */
59 uint32_t normal_pages;
Juan Quintelad32ca5a2020-01-22 16:16:07 +010060 /* size of the next packet that contains pages */
61 uint32_t next_packet_size;
62 uint64_t packet_num;
Hao Xiang303e6f52024-03-11 18:00:12 +000063 /* zero pages */
64 uint32_t zero_pages;
65 uint32_t unused32[1]; /* Reserved for future use */
66 uint64_t unused64[3]; /* Reserved for future use */
Juan Quintelad32ca5a2020-01-22 16:16:07 +010067 char ramblock[256];
Hao Xiang303e6f52024-03-11 18:00:12 +000068 /*
69 * This array contains the pointers to:
70 * - normal pages (initial normal_pages entries)
71 * - zero pages (following zero_pages entries)
72 */
Juan Quintelad32ca5a2020-01-22 16:16:07 +010073 uint64_t offset[];
74} __attribute__((packed)) MultiFDPacket_t;
75
76typedef struct {
77 /* number of used pages */
Juan Quintela90a3d2f2021-11-22 11:51:40 +010078 uint32_t num;
Hao Xiang303e6f52024-03-11 18:00:12 +000079 /* number of normal pages */
80 uint32_t normal_num;
Juan Quintelad32ca5a2020-01-22 16:16:07 +010081 RAMBlock *block;
Fabiano Rosas0e427da2024-08-27 14:45:53 -030082 /* offset of each page */
83 ram_addr_t offset[];
Juan Quintelad32ca5a2020-01-22 16:16:07 +010084} MultiFDPages_t;
85
Fabiano Rosasd117ed02024-02-29 12:30:09 -030086struct MultiFDRecvData {
87 void *opaque;
88 size_t size;
89 /* for preadv */
90 off_t file_offset;
91};
92
Fabiano Rosasaddd7d12024-08-27 14:45:52 -030093typedef enum {
94 MULTIFD_PAYLOAD_NONE,
95 MULTIFD_PAYLOAD_RAM,
96} MultiFDPayloadType;
97
98typedef union MultiFDPayload {
99 MultiFDPages_t ram;
100} MultiFDPayload;
101
102struct MultiFDSendData {
103 MultiFDPayloadType type;
104 MultiFDPayload u;
105};
106
107static inline bool multifd_payload_empty(MultiFDSendData *data)
108{
109 return data->type == MULTIFD_PAYLOAD_NONE;
110}
111
112static inline void multifd_set_payload_type(MultiFDSendData *data,
113 MultiFDPayloadType type)
114{
115 data->type = type;
116}
117
Juan Quintelad32ca5a2020-01-22 16:16:07 +0100118typedef struct {
Juan Quintela4a8f19c2022-05-31 12:43:06 +0200119 /* Fields are only written at creating/deletion time */
120 /* No lock required for them, they are read only */
121
Juan Quintelad32ca5a2020-01-22 16:16:07 +0100122 /* channel number */
123 uint8_t id;
124 /* channel thread name */
125 char *name;
126 /* channel thread id */
127 QemuThread thread;
Fabiano Rosasa2a63c42024-02-06 18:51:14 -0300128 bool thread_created;
Fabiano Rosase1921f12024-02-06 18:51:13 -0300129 QemuThread tls_thread;
130 bool tls_thread_created;
Juan Quintelad32ca5a2020-01-22 16:16:07 +0100131 /* communication channel */
132 QIOChannel *c;
Juan Quintela4a8f19c2022-05-31 12:43:06 +0200133 /* packet allocated len */
134 uint32_t packet_len;
135 /* multifd flags for sending ram */
136 int write_flags;
137
Juan Quintelad32ca5a2020-01-22 16:16:07 +0100138 /* sem where to wait for more work */
139 QemuSemaphore sem;
Juan Quintela4a8f19c2022-05-31 12:43:06 +0200140 /* syncs main thread and channels */
141 QemuSemaphore sem_sync;
142
Juan Quintelad32ca5a2020-01-22 16:16:07 +0100143 /* multifd flags for each packet */
144 uint32_t flags;
Peter Xuf5f48a72024-02-02 18:28:40 +0800145 /*
146 * The sender thread has work to do if either of below boolean is set.
147 *
148 * @pending_job: a job is pending
149 * @pending_sync: a sync request is pending
150 *
151 * For both of these fields, they're only set by the requesters, and
152 * cleared by the multifd sender threads.
153 */
154 bool pending_job;
155 bool pending_sync;
Fabiano Rosas9f0e1082024-08-27 14:45:54 -0300156 MultiFDSendData *data;
Juan Quintela4a8f19c2022-05-31 12:43:06 +0200157
158 /* thread local variables. No locking required */
159
160 /* pointer to the packet */
161 MultiFDPacket_t *packet;
162 /* size of the next packet that contains pages */
163 uint32_t next_packet_size;
Juan Quintelad32ca5a2020-01-22 16:16:07 +0100164 /* packets sent through this channel */
Peter Xu05b7ec12024-02-02 18:28:43 +0800165 uint64_t packets_sent;
Juan Quintela226468b2021-11-19 12:06:05 +0100166 /* buffers to send */
167 struct iovec *iov;
168 /* number of iovs used */
169 uint32_t iovs_num;
Juan Quintelaab7cbb02019-05-15 13:37:46 +0200170 /* used for compression methods */
Fabiano Rosas402dd7a2024-02-29 12:30:06 -0300171 void *compress_data;
Juan Quintelad32ca5a2020-01-22 16:16:07 +0100172} MultiFDSendParams;
173
174typedef struct {
Juan Quintela4a8f19c2022-05-31 12:43:06 +0200175 /* Fields are only written at creating/deletion time */
176 /* No lock required for them, they are read only */
177
Juan Quintelad32ca5a2020-01-22 16:16:07 +0100178 /* channel number */
179 uint8_t id;
180 /* channel thread name */
181 char *name;
182 /* channel thread id */
183 QemuThread thread;
Fabiano Rosasa2a63c42024-02-06 18:51:14 -0300184 bool thread_created;
Juan Quintelad32ca5a2020-01-22 16:16:07 +0100185 /* communication channel */
186 QIOChannel *c;
Juan Quintela4a8f19c2022-05-31 12:43:06 +0200187 /* packet allocated len */
188 uint32_t packet_len;
189
190 /* syncs main thread and channels */
191 QemuSemaphore sem_sync;
Fabiano Rosasd117ed02024-02-29 12:30:09 -0300192 /* sem where to wait for more work */
193 QemuSemaphore sem;
Juan Quintela4a8f19c2022-05-31 12:43:06 +0200194
Juan Quintelad32ca5a2020-01-22 16:16:07 +0100195 /* this mutex protects the following parameters */
196 QemuMutex mutex;
Juan Quintelad32ca5a2020-01-22 16:16:07 +0100197 /* should this thread finish */
198 bool quit;
Juan Quintelad32ca5a2020-01-22 16:16:07 +0100199 /* multifd flags for each packet */
200 uint32_t flags;
201 /* global number of generated multifd packets */
202 uint64_t packet_num;
Fabiano Rosasd117ed02024-02-29 12:30:09 -0300203 int pending_job;
204 MultiFDRecvData *data;
Juan Quintela4a8f19c2022-05-31 12:43:06 +0200205
206 /* thread local variables. No locking required */
207
208 /* pointer to the packet */
209 MultiFDPacket_t *packet;
Juan Quintelad32ca5a2020-01-22 16:16:07 +0100210 /* size of the next packet that contains pages */
211 uint32_t next_packet_size;
Peter Xu05b7ec12024-02-02 18:28:43 +0800212 /* packets received through this channel */
213 uint64_t packets_recved;
Lukas Straub5d1d1fc2023-05-08 21:11:07 +0200214 /* ramblock */
215 RAMBlock *block;
Juan Quintela4a8f19c2022-05-31 12:43:06 +0200216 /* ramblock host address */
217 uint8_t *host;
Juan Quintela226468b2021-11-19 12:06:05 +0100218 /* buffers to recv */
219 struct iovec *iov;
Juan Quintelacf2d4aa2021-11-22 13:41:06 +0100220 /* Pages that are not zero */
221 ram_addr_t *normal;
222 /* num of non zero pages */
223 uint32_t normal_num;
Hao Xiang303e6f52024-03-11 18:00:12 +0000224 /* Pages that are zero */
225 ram_addr_t *zero;
226 /* num of zero pages */
227 uint32_t zero_num;
Juan Quintelaab7cbb02019-05-15 13:37:46 +0200228 /* used for de-compression methods */
Fabiano Rosas402dd7a2024-02-29 12:30:06 -0300229 void *compress_data;
Juan Quintelad32ca5a2020-01-22 16:16:07 +0100230} MultiFDRecvParams;
231
Juan Quintelaab7cbb02019-05-15 13:37:46 +0200232typedef struct {
Fabiano Rosas62e1af12024-08-28 11:56:50 -0300233 /*
234 * The send_setup, send_cleanup, send_prepare are only called on
235 * the QEMU instance at the migration source.
236 */
237
238 /*
239 * Setup for sending side. Called once per channel during channel
240 * setup phase.
241 *
242 * Must allocate p->iov. If packets are in use (default), one
243 * extra iovec must be allocated for the packet header. Any memory
244 * allocated in this hook must be released at send_cleanup.
245 *
246 * p->write_flags may be used for passing flags to the QIOChannel.
247 *
248 * p->compression_data may be used by compression methods to store
249 * compression data.
250 */
Juan Quintelaab7cbb02019-05-15 13:37:46 +0200251 int (*send_setup)(MultiFDSendParams *p, Error **errp);
Fabiano Rosas62e1af12024-08-28 11:56:50 -0300252
253 /*
254 * Cleanup for sending side. Called once per channel during
255 * channel cleanup phase.
256 */
Juan Quintelaab7cbb02019-05-15 13:37:46 +0200257 void (*send_cleanup)(MultiFDSendParams *p, Error **errp);
Fabiano Rosas62e1af12024-08-28 11:56:50 -0300258
259 /*
260 * Prepare the send packet. Called as a result of multifd_send()
261 * on the client side, with p pointing to the MultiFDSendParams of
262 * a channel that is currently idle.
263 *
264 * Must populate p->iov with the data to be sent, increment
265 * p->iovs_num to match the amount of iovecs used and set
266 * p->next_packet_size with the amount of data currently present
267 * in p->iov.
268 *
269 * Must indicate whether this is a compression packet by setting
270 * p->flags.
271 *
272 * As a last step, if packets are in use (default), must prepare
273 * the packet by calling multifd_send_fill_packet().
274 */
Juan Quintela02fb8102021-11-22 12:08:08 +0100275 int (*send_prepare)(MultiFDSendParams *p, Error **errp);
Fabiano Rosas62e1af12024-08-28 11:56:50 -0300276
277 /*
278 * The recv_setup, recv_cleanup, recv are only called on the QEMU
279 * instance at the migration destination.
280 */
281
282 /*
283 * Setup for receiving side. Called once per channel during
284 * channel setup phase. May be empty.
285 *
286 * May allocate data structures for the receiving of data. May use
287 * p->iov. Compression methods may use p->compress_data.
288 */
Juan Quintelaab7cbb02019-05-15 13:37:46 +0200289 int (*recv_setup)(MultiFDRecvParams *p, Error **errp);
Fabiano Rosas62e1af12024-08-28 11:56:50 -0300290
291 /*
292 * Cleanup for receiving side. Called once per channel during
293 * channel cleanup phase. May be empty.
294 */
Juan Quintelaab7cbb02019-05-15 13:37:46 +0200295 void (*recv_cleanup)(MultiFDRecvParams *p);
Fabiano Rosas62e1af12024-08-28 11:56:50 -0300296
297 /*
298 * Data receive method. Called as a result of multifd_recv() on
299 * the client side, with p pointing to the MultiFDRecvParams of a
300 * channel that is currently idle. Only called if there is data
301 * available to receive.
302 *
303 * Must validate p->flags according to what was set at
304 * send_prepare.
305 *
306 * Must read the data from the QIOChannel p->c.
307 */
Fabiano Rosas9db19122024-02-29 12:30:07 -0300308 int (*recv)(MultiFDRecvParams *p, Error **errp);
Juan Quintelaab7cbb02019-05-15 13:37:46 +0200309} MultiFDMethods;
310
Fabiano Rosas308d1652024-08-27 14:46:04 -0300311void multifd_register_ops(int method, const MultiFDMethods *ops);
Peter Xu25a1f872024-02-02 18:28:47 +0800312void multifd_send_fill_packet(MultiFDSendParams *p);
Hao Xiang303e6f52024-03-11 18:00:12 +0000313bool multifd_send_prepare_common(MultiFDSendParams *p);
314void multifd_send_zero_page_detect(MultiFDSendParams *p);
315void multifd_recv_zero_page_process(MultiFDRecvParams *p);
Juan Quintela7ec2c2b2019-01-04 15:30:06 +0100316
Peter Xu452b2052024-02-02 18:28:46 +0800317static inline void multifd_send_prepare_header(MultiFDSendParams *p)
318{
319 p->iov[0].iov_len = p->packet_len;
320 p->iov[0].iov_base = p->packet;
321 p->iovs_num++;
322}
323
Fabiano Rosasb7b03eb2024-02-29 12:30:11 -0300324void multifd_channel_connect(MultiFDSendParams *p, QIOChannel *ioc);
Fabiano Rosas40c94712024-08-27 14:46:03 -0300325bool multifd_send(MultiFDSendData **send_data);
326MultiFDSendData *multifd_send_data_alloc(void);
Peter Xu452b2052024-02-02 18:28:46 +0800327
Fabiano Rosas90fa1212024-08-27 14:45:49 -0300328static inline uint32_t multifd_ram_page_size(void)
329{
330 return qemu_target_page_size();
331}
332
333static inline uint32_t multifd_ram_page_count(void)
334{
335 return MULTIFD_PACKET_SIZE / qemu_target_page_size();
336}
Fabiano Rosasa71ef5c2024-08-27 14:45:59 -0300337
338void multifd_ram_save_setup(void);
339void multifd_ram_save_cleanup(void);
Fabiano Rosasa0c78d82024-08-27 14:46:00 -0300340int multifd_ram_flush_and_sync(void);
Fabiano Rosas40c94712024-08-27 14:46:03 -0300341size_t multifd_ram_payload_size(void);
342void multifd_ram_fill_packet(MultiFDSendParams *p);
343int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp);
Juan Quintelad32ca5a2020-01-22 16:16:07 +0100344#endif