| /* | 
 |  * Xor Based Zero Run Length Encoding | 
 |  * | 
 |  * Copyright 2013 Red Hat, Inc. and/or its affiliates | 
 |  * | 
 |  * Authors: | 
 |  *  Orit Wasserman  <owasserm@redhat.com> | 
 |  * | 
 |  * This work is licensed under the terms of the GNU GPL, version 2 or later. | 
 |  * See the COPYING file in the top-level directory. | 
 |  * | 
 |  */ | 
 | #include "qemu/osdep.h" | 
 | #include "qemu/cutils.h" | 
 | #include "qemu/host-utils.h" | 
 | #include "xbzrle.h" | 
 |  | 
 | #if defined(CONFIG_AVX512BW_OPT) | 
 | #include <immintrin.h> | 
 | #include "host/cpuinfo.h" | 
 |  | 
 | static int __attribute__((target("avx512bw"))) | 
 | xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen, | 
 |                             uint8_t *dst, int dlen) | 
 | { | 
 |     uint32_t zrun_len = 0, nzrun_len = 0; | 
 |     int d = 0, i = 0, num = 0; | 
 |     uint8_t *nzrun_start = NULL; | 
 |     /* add 1 to include residual part in main loop */ | 
 |     uint32_t count512s = (slen >> 6) + 1; | 
 |     /* countResidual is tail of data, i.e., countResidual = slen % 64 */ | 
 |     uint32_t count_residual = slen & 0b111111; | 
 |     bool never_same = true; | 
 |     uint64_t mask_residual = 1; | 
 |     mask_residual <<= count_residual; | 
 |     mask_residual -= 1; | 
 |     __m512i r = _mm512_set1_epi32(0); | 
 |  | 
 |     while (count512s) { | 
 |         int bytes_to_check = 64; | 
 |         uint64_t mask = 0xffffffffffffffff; | 
 |         if (count512s == 1) { | 
 |             bytes_to_check = count_residual; | 
 |             mask = mask_residual; | 
 |         } | 
 |         __m512i old_data = _mm512_mask_loadu_epi8(r, | 
 |                                                   mask, old_buf + i); | 
 |         __m512i new_data = _mm512_mask_loadu_epi8(r, | 
 |                                                   mask, new_buf + i); | 
 |         uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data); | 
 |         count512s--; | 
 |  | 
 |         bool is_same = (comp & 0x1); | 
 |         while (bytes_to_check) { | 
 |             if (d + 2 > dlen) { | 
 |                 return -1; | 
 |             } | 
 |             if (is_same) { | 
 |                 if (nzrun_len) { | 
 |                     d += uleb128_encode_small(dst + d, nzrun_len); | 
 |                     if (d + nzrun_len > dlen) { | 
 |                         return -1; | 
 |                     } | 
 |                     nzrun_start = new_buf + i - nzrun_len; | 
 |                     memcpy(dst + d, nzrun_start, nzrun_len); | 
 |                     d += nzrun_len; | 
 |                     nzrun_len = 0; | 
 |                 } | 
 |                 /* 64 data at a time for speed */ | 
 |                 if (count512s && (comp == 0xffffffffffffffff)) { | 
 |                     i += 64; | 
 |                     zrun_len += 64; | 
 |                     break; | 
 |                 } | 
 |                 never_same = false; | 
 |                 num = ctz64(~comp); | 
 |                 num = (num < bytes_to_check) ? num : bytes_to_check; | 
 |                 zrun_len += num; | 
 |                 bytes_to_check -= num; | 
 |                 comp >>= num; | 
 |                 i += num; | 
 |                 if (bytes_to_check) { | 
 |                     /* still has different data after same data */ | 
 |                     d += uleb128_encode_small(dst + d, zrun_len); | 
 |                     zrun_len = 0; | 
 |                 } else { | 
 |                     break; | 
 |                 } | 
 |             } | 
 |             if (never_same || zrun_len) { | 
 |                 /* | 
 |                  * never_same only acts if | 
 |                  * data begins with diff in first count512s | 
 |                  */ | 
 |                 d += uleb128_encode_small(dst + d, zrun_len); | 
 |                 zrun_len = 0; | 
 |                 never_same = false; | 
 |             } | 
 |             /* has diff, 64 data at a time for speed */ | 
 |             if ((bytes_to_check == 64) && (comp == 0x0)) { | 
 |                 i += 64; | 
 |                 nzrun_len += 64; | 
 |                 break; | 
 |             } | 
 |             num = ctz64(comp); | 
 |             num = (num < bytes_to_check) ? num : bytes_to_check; | 
 |             nzrun_len += num; | 
 |             bytes_to_check -= num; | 
 |             comp >>= num; | 
 |             i += num; | 
 |             if (bytes_to_check) { | 
 |                 /* mask like 111000 */ | 
 |                 d += uleb128_encode_small(dst + d, nzrun_len); | 
 |                 /* overflow */ | 
 |                 if (d + nzrun_len > dlen) { | 
 |                     return -1; | 
 |                 } | 
 |                 nzrun_start = new_buf + i - nzrun_len; | 
 |                 memcpy(dst + d, nzrun_start, nzrun_len); | 
 |                 d += nzrun_len; | 
 |                 nzrun_len = 0; | 
 |                 is_same = true; | 
 |             } | 
 |         } | 
 |     } | 
 |  | 
 |     if (nzrun_len != 0) { | 
 |         d += uleb128_encode_small(dst + d, nzrun_len); | 
 |         /* overflow */ | 
 |         if (d + nzrun_len > dlen) { | 
 |             return -1; | 
 |         } | 
 |         nzrun_start = new_buf + i - nzrun_len; | 
 |         memcpy(dst + d, nzrun_start, nzrun_len); | 
 |         d += nzrun_len; | 
 |     } | 
 |     return d; | 
 | } | 
 |  | 
 | static int xbzrle_encode_buffer_int(uint8_t *old_buf, uint8_t *new_buf, | 
 |                                     int slen, uint8_t *dst, int dlen); | 
 |  | 
 | static int (*accel_func)(uint8_t *, uint8_t *, int, uint8_t *, int); | 
 |  | 
 | static void __attribute__((constructor)) init_accel(void) | 
 | { | 
 |     unsigned info = cpuinfo_init(); | 
 |     if (info & CPUINFO_AVX512BW) { | 
 |         accel_func = xbzrle_encode_buffer_avx512; | 
 |     } else { | 
 |         accel_func = xbzrle_encode_buffer_int; | 
 |     } | 
 | } | 
 |  | 
 | int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen, | 
 |                          uint8_t *dst, int dlen) | 
 | { | 
 |     return accel_func(old_buf, new_buf, slen, dst, dlen); | 
 | } | 
 |  | 
 | #define xbzrle_encode_buffer xbzrle_encode_buffer_int | 
 | #endif | 
 |  | 
 | /* | 
 |   page = zrun nzrun | 
 |        | zrun nzrun page | 
 |  | 
 |   zrun = length | 
 |  | 
 |   nzrun = length byte... | 
 |  | 
 |   length = uleb128 encoded integer | 
 |  */ | 
 | int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen, | 
 |                          uint8_t *dst, int dlen) | 
 | { | 
 |     uint32_t zrun_len = 0, nzrun_len = 0; | 
 |     int d = 0, i = 0; | 
 |     long res; | 
 |     uint8_t *nzrun_start = NULL; | 
 |  | 
 |     g_assert(!(((uintptr_t)old_buf | (uintptr_t)new_buf | slen) % | 
 |                sizeof(long))); | 
 |  | 
 |     while (i < slen) { | 
 |         /* overflow */ | 
 |         if (d + 2 > dlen) { | 
 |             return -1; | 
 |         } | 
 |  | 
 |         /* not aligned to sizeof(long) */ | 
 |         res = (slen - i) % sizeof(long); | 
 |         while (res && old_buf[i] == new_buf[i]) { | 
 |             zrun_len++; | 
 |             i++; | 
 |             res--; | 
 |         } | 
 |  | 
 |         /* word at a time for speed */ | 
 |         if (!res) { | 
 |             while (i < slen && | 
 |                    (*(long *)(old_buf + i)) == (*(long *)(new_buf + i))) { | 
 |                 i += sizeof(long); | 
 |                 zrun_len += sizeof(long); | 
 |             } | 
 |  | 
 |             /* go over the rest */ | 
 |             while (i < slen && old_buf[i] == new_buf[i]) { | 
 |                 zrun_len++; | 
 |                 i++; | 
 |             } | 
 |         } | 
 |  | 
 |         /* buffer unchanged */ | 
 |         if (zrun_len == slen) { | 
 |             return 0; | 
 |         } | 
 |  | 
 |         /* skip last zero run */ | 
 |         if (i == slen) { | 
 |             return d; | 
 |         } | 
 |  | 
 |         d += uleb128_encode_small(dst + d, zrun_len); | 
 |  | 
 |         zrun_len = 0; | 
 |         nzrun_start = new_buf + i; | 
 |  | 
 |         /* overflow */ | 
 |         if (d + 2 > dlen) { | 
 |             return -1; | 
 |         } | 
 |         /* not aligned to sizeof(long) */ | 
 |         res = (slen - i) % sizeof(long); | 
 |         while (res && old_buf[i] != new_buf[i]) { | 
 |             i++; | 
 |             nzrun_len++; | 
 |             res--; | 
 |         } | 
 |  | 
 |         /* word at a time for speed, use of 32-bit long okay */ | 
 |         if (!res) { | 
 |             /* truncation to 32-bit long okay */ | 
 |             unsigned long mask = (unsigned long)0x0101010101010101ULL; | 
 |             while (i < slen) { | 
 |                 unsigned long xor; | 
 |                 xor = *(unsigned long *)(old_buf + i) | 
 |                     ^ *(unsigned long *)(new_buf + i); | 
 |                 if ((xor - mask) & ~xor & (mask << 7)) { | 
 |                     /* found the end of an nzrun within the current long */ | 
 |                     while (old_buf[i] != new_buf[i]) { | 
 |                         nzrun_len++; | 
 |                         i++; | 
 |                     } | 
 |                     break; | 
 |                 } else { | 
 |                     i += sizeof(long); | 
 |                     nzrun_len += sizeof(long); | 
 |                 } | 
 |             } | 
 |         } | 
 |  | 
 |         d += uleb128_encode_small(dst + d, nzrun_len); | 
 |         /* overflow */ | 
 |         if (d + nzrun_len > dlen) { | 
 |             return -1; | 
 |         } | 
 |         memcpy(dst + d, nzrun_start, nzrun_len); | 
 |         d += nzrun_len; | 
 |         nzrun_len = 0; | 
 |     } | 
 |  | 
 |     return d; | 
 | } | 
 |  | 
 | int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen) | 
 | { | 
 |     int i = 0, d = 0; | 
 |     int ret; | 
 |     uint32_t count = 0; | 
 |  | 
 |     while (i < slen) { | 
 |  | 
 |         /* zrun */ | 
 |         if ((slen - i) < 2) { | 
 |             return -1; | 
 |         } | 
 |  | 
 |         ret = uleb128_decode_small(src + i, &count); | 
 |         if (ret < 0 || (i && !count)) { | 
 |             return -1; | 
 |         } | 
 |         i += ret; | 
 |         d += count; | 
 |  | 
 |         /* overflow */ | 
 |         if (d > dlen) { | 
 |             return -1; | 
 |         } | 
 |  | 
 |         /* nzrun */ | 
 |         if ((slen - i) < 2) { | 
 |             return -1; | 
 |         } | 
 |  | 
 |         ret = uleb128_decode_small(src + i, &count); | 
 |         if (ret < 0 || !count) { | 
 |             return -1; | 
 |         } | 
 |         i += ret; | 
 |  | 
 |         /* overflow */ | 
 |         if (d + count > dlen || i + count > slen) { | 
 |             return -1; | 
 |         } | 
 |  | 
 |         memcpy(dst + d, src + i, count); | 
 |         d += count; | 
 |         i += count; | 
 |     } | 
 |  | 
 |     return d; | 
 | } |