migration: search for zero instead of dup pages
virtually all dup pages are zero pages. remove
the special is_dup_page() function and use the
optimized buffer_find_nonzero_offset() function
instead.
here buffer_find_nonzero_offset() is used directly
to avoid the unnecssary additional checks in
buffer_is_zero().
raw performace gain checking 1 GByte zeroed memory
over is_dup_page() is approx. 10-12% with SSE2
and 8-10% with unsigned long arithmedtic.
Signed-off-by: Peter Lieven <pl@kamp.de>
Reviewed-by: Orit Wasserman <owasserm@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
diff --git a/arch_init.c b/arch_init.c
index 35974c2..dd5deff 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -146,19 +146,10 @@
return 0;
}
-static int is_dup_page(uint8_t *page)
+static inline bool is_zero_page(uint8_t *p)
{
- VECTYPE *p = (VECTYPE *)page;
- VECTYPE val = SPLAT(page);
- int i;
-
- for (i = 0; i < TARGET_PAGE_SIZE / sizeof(VECTYPE); i++) {
- if (!ALL_EQ(val, p[i])) {
- return 0;
- }
- }
-
- return 1;
+ return buffer_find_nonzero_offset(p, TARGET_PAGE_SIZE) ==
+ TARGET_PAGE_SIZE;
}
/* struct contains XBZRLE cache and a static page
@@ -445,12 +436,12 @@
/* In doubt sent page as normal */
bytes_sent = -1;
- if (is_dup_page(p)) {
+ if (is_zero_page(p)) {
acct_info.dup_pages++;
bytes_sent = save_block_hdr(f, block, offset, cont,
RAM_SAVE_FLAG_COMPRESS);
- qemu_put_byte(f, *p);
- bytes_sent += 1;
+ qemu_put_byte(f, 0);
+ bytes_sent++;
} else if (migrate_use_xbzrle()) {
current_addr = block->offset + offset;
bytes_sent = save_xbzrle_page(f, p, current_addr, block,