Emilio G. Cota | 515864a | 2016-06-08 14:55:30 -0400 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2016, Emilio G. Cota <cota@braap.org> |
| 3 | * |
| 4 | * License: GNU GPL, version 2 or later. |
| 5 | * See the COPYING file in the top-level directory. |
| 6 | */ |
| 7 | #include "qemu/osdep.h" |
Emilio G. Cota | 515864a | 2016-06-08 14:55:30 -0400 | [diff] [blame] | 8 | #include "qemu/processor.h" |
| 9 | #include "qemu/atomic.h" |
| 10 | #include "qemu/qht.h" |
| 11 | #include "qemu/rcu.h" |
| 12 | #include "exec/tb-hash-xx.h" |
| 13 | |
| 14 | struct thread_stats { |
| 15 | size_t rd; |
| 16 | size_t not_rd; |
| 17 | size_t in; |
| 18 | size_t not_in; |
| 19 | size_t rm; |
| 20 | size_t not_rm; |
| 21 | size_t rz; |
| 22 | size_t not_rz; |
| 23 | }; |
| 24 | |
| 25 | struct thread_info { |
| 26 | void (*func)(struct thread_info *); |
| 27 | struct thread_stats stats; |
| 28 | uint64_t r; |
| 29 | bool write_op; /* writes alternate between insertions and removals */ |
| 30 | bool resize_down; |
| 31 | } QEMU_ALIGNED(64); /* avoid false sharing among threads */ |
| 32 | |
| 33 | static struct qht ht; |
| 34 | static QemuThread *rw_threads; |
| 35 | |
| 36 | #define DEFAULT_RANGE (4096) |
| 37 | #define DEFAULT_QHT_N_ELEMS DEFAULT_RANGE |
| 38 | |
| 39 | static unsigned int duration = 1; |
| 40 | static unsigned int n_rw_threads = 1; |
| 41 | static unsigned long lookup_range = DEFAULT_RANGE; |
| 42 | static unsigned long update_range = DEFAULT_RANGE; |
| 43 | static size_t init_range = DEFAULT_RANGE; |
| 44 | static size_t init_size = DEFAULT_RANGE; |
| 45 | static size_t n_ready_threads; |
| 46 | static long populate_offset; |
| 47 | static long *keys; |
| 48 | |
| 49 | static size_t resize_min; |
| 50 | static size_t resize_max; |
| 51 | static struct thread_info *rz_info; |
| 52 | static unsigned long resize_delay = 1000; |
| 53 | static double resize_rate; /* 0.0 to 1.0 */ |
| 54 | static unsigned int n_rz_threads = 1; |
| 55 | static QemuThread *rz_threads; |
| 56 | |
| 57 | static double update_rate; /* 0.0 to 1.0 */ |
| 58 | static uint64_t update_threshold; |
| 59 | static uint64_t resize_threshold; |
| 60 | |
| 61 | static size_t qht_n_elems = DEFAULT_QHT_N_ELEMS; |
| 62 | static int qht_mode; |
| 63 | |
| 64 | static bool test_start; |
| 65 | static bool test_stop; |
| 66 | |
| 67 | static struct thread_info *rw_info; |
| 68 | |
| 69 | static const char commands_string[] = |
| 70 | " -d = duration, in seconds\n" |
| 71 | " -n = number of threads\n" |
| 72 | "\n" |
| 73 | " -o = offset at which keys start\n" |
| 74 | "\n" |
| 75 | " -g = set -s,-k,-K,-l,-r to the same value\n" |
| 76 | " -s = initial size hint\n" |
| 77 | " -k = initial number of keys\n" |
| 78 | " -K = initial range of keys (will be rounded up to pow2)\n" |
| 79 | " -l = lookup range of keys (will be rounded up to pow2)\n" |
| 80 | " -r = update range of keys (will be rounded up to pow2)\n" |
| 81 | "\n" |
| 82 | " -u = update rate (0.0 to 100.0), 50/50 split of insertions/removals\n" |
| 83 | "\n" |
| 84 | " -R = enable auto-resize\n" |
| 85 | " -S = resize rate (0.0 to 100.0)\n" |
| 86 | " -D = delay (in us) between potential resizes\n" |
| 87 | " -N = number of resize threads"; |
| 88 | |
| 89 | static void usage_complete(int argc, char *argv[]) |
| 90 | { |
| 91 | fprintf(stderr, "Usage: %s [options]\n", argv[0]); |
| 92 | fprintf(stderr, "options:\n%s\n", commands_string); |
| 93 | exit(-1); |
| 94 | } |
| 95 | |
| 96 | static bool is_equal(const void *obj, const void *userp) |
| 97 | { |
| 98 | const long *a = obj; |
| 99 | const long *b = userp; |
| 100 | |
| 101 | return *a == *b; |
| 102 | } |
| 103 | |
| 104 | static inline uint32_t h(unsigned long v) |
| 105 | { |
Emilio G. Cota | 4e2ca83 | 2017-07-11 14:29:37 -0400 | [diff] [blame] | 106 | return tb_hash_func7(v, 0, 0, 0, 0); |
Emilio G. Cota | 515864a | 2016-06-08 14:55:30 -0400 | [diff] [blame] | 107 | } |
| 108 | |
| 109 | /* |
| 110 | * From: https://en.wikipedia.org/wiki/Xorshift |
| 111 | * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only |
| 112 | * guaranteed to be >= INT_MAX). |
| 113 | */ |
| 114 | static uint64_t xorshift64star(uint64_t x) |
| 115 | { |
| 116 | x ^= x >> 12; /* a */ |
| 117 | x ^= x << 25; /* b */ |
| 118 | x ^= x >> 27; /* c */ |
| 119 | return x * UINT64_C(2685821657736338717); |
| 120 | } |
| 121 | |
| 122 | static void do_rz(struct thread_info *info) |
| 123 | { |
| 124 | struct thread_stats *stats = &info->stats; |
| 125 | |
| 126 | if (info->r < resize_threshold) { |
| 127 | size_t size = info->resize_down ? resize_min : resize_max; |
| 128 | bool resized; |
| 129 | |
| 130 | resized = qht_resize(&ht, size); |
| 131 | info->resize_down = !info->resize_down; |
| 132 | |
| 133 | if (resized) { |
| 134 | stats->rz++; |
| 135 | } else { |
| 136 | stats->not_rz++; |
| 137 | } |
| 138 | } |
| 139 | g_usleep(resize_delay); |
| 140 | } |
| 141 | |
| 142 | static void do_rw(struct thread_info *info) |
| 143 | { |
| 144 | struct thread_stats *stats = &info->stats; |
| 145 | uint32_t hash; |
| 146 | long *p; |
| 147 | |
| 148 | if (info->r >= update_threshold) { |
| 149 | bool read; |
| 150 | |
| 151 | p = &keys[info->r & (lookup_range - 1)]; |
| 152 | hash = h(*p); |
| 153 | read = qht_lookup(&ht, is_equal, p, hash); |
| 154 | if (read) { |
| 155 | stats->rd++; |
| 156 | } else { |
| 157 | stats->not_rd++; |
| 158 | } |
| 159 | } else { |
| 160 | p = &keys[info->r & (update_range - 1)]; |
| 161 | hash = h(*p); |
| 162 | if (info->write_op) { |
| 163 | bool written = false; |
| 164 | |
| 165 | if (qht_lookup(&ht, is_equal, p, hash) == NULL) { |
| 166 | written = qht_insert(&ht, p, hash); |
| 167 | } |
| 168 | if (written) { |
| 169 | stats->in++; |
| 170 | } else { |
| 171 | stats->not_in++; |
| 172 | } |
| 173 | } else { |
| 174 | bool removed = false; |
| 175 | |
| 176 | if (qht_lookup(&ht, is_equal, p, hash)) { |
| 177 | removed = qht_remove(&ht, p, hash); |
| 178 | } |
| 179 | if (removed) { |
| 180 | stats->rm++; |
| 181 | } else { |
| 182 | stats->not_rm++; |
| 183 | } |
| 184 | } |
| 185 | info->write_op = !info->write_op; |
| 186 | } |
| 187 | } |
| 188 | |
| 189 | static void *thread_func(void *p) |
| 190 | { |
| 191 | struct thread_info *info = p; |
| 192 | |
| 193 | rcu_register_thread(); |
| 194 | |
| 195 | atomic_inc(&n_ready_threads); |
Emilio G. Cota | 977ec47 | 2016-10-14 11:54:51 +0200 | [diff] [blame] | 196 | while (!atomic_read(&test_start)) { |
Emilio G. Cota | 515864a | 2016-06-08 14:55:30 -0400 | [diff] [blame] | 197 | cpu_relax(); |
| 198 | } |
| 199 | |
| 200 | rcu_read_lock(); |
| 201 | while (!atomic_read(&test_stop)) { |
| 202 | info->r = xorshift64star(info->r); |
| 203 | info->func(info); |
| 204 | } |
| 205 | rcu_read_unlock(); |
| 206 | |
| 207 | rcu_unregister_thread(); |
| 208 | return NULL; |
| 209 | } |
| 210 | |
| 211 | /* sets everything except info->func */ |
| 212 | static void prepare_thread_info(struct thread_info *info, int i) |
| 213 | { |
| 214 | /* seed for the RNG; each thread should have a different one */ |
| 215 | info->r = (i + 1) ^ time(NULL); |
| 216 | /* the first update will be a write */ |
| 217 | info->write_op = true; |
| 218 | /* the first resize will be down */ |
| 219 | info->resize_down = true; |
| 220 | |
| 221 | memset(&info->stats, 0, sizeof(info->stats)); |
| 222 | } |
| 223 | |
| 224 | static void |
| 225 | th_create_n(QemuThread **threads, struct thread_info **infos, const char *name, |
| 226 | void (*func)(struct thread_info *), int offset, int n) |
| 227 | { |
| 228 | struct thread_info *info; |
| 229 | QemuThread *th; |
| 230 | int i; |
| 231 | |
| 232 | th = g_malloc(sizeof(*th) * n); |
| 233 | *threads = th; |
| 234 | |
| 235 | info = qemu_memalign(64, sizeof(*info) * n); |
| 236 | *infos = info; |
| 237 | |
| 238 | for (i = 0; i < n; i++) { |
| 239 | prepare_thread_info(&info[i], offset + i); |
| 240 | info[i].func = func; |
| 241 | qemu_thread_create(&th[i], name, thread_func, &info[i], |
| 242 | QEMU_THREAD_JOINABLE); |
| 243 | } |
| 244 | } |
| 245 | |
| 246 | static void create_threads(void) |
| 247 | { |
| 248 | th_create_n(&rw_threads, &rw_info, "rw", do_rw, 0, n_rw_threads); |
| 249 | th_create_n(&rz_threads, &rz_info, "rz", do_rz, n_rw_threads, n_rz_threads); |
| 250 | } |
| 251 | |
| 252 | static void pr_params(void) |
| 253 | { |
| 254 | printf("Parameters:\n"); |
| 255 | printf(" duration: %d s\n", duration); |
| 256 | printf(" # of threads: %u\n", n_rw_threads); |
| 257 | printf(" initial # of keys: %zu\n", init_size); |
| 258 | printf(" initial size hint: %zu\n", qht_n_elems); |
| 259 | printf(" auto-resize: %s\n", |
| 260 | qht_mode & QHT_MODE_AUTO_RESIZE ? "on" : "off"); |
| 261 | if (resize_rate) { |
| 262 | printf(" resize_rate: %f%%\n", resize_rate * 100.0); |
| 263 | printf(" resize range: %zu-%zu\n", resize_min, resize_max); |
| 264 | printf(" # resize threads %u\n", n_rz_threads); |
| 265 | } |
| 266 | printf(" update rate: %f%%\n", update_rate * 100.0); |
| 267 | printf(" offset: %ld\n", populate_offset); |
| 268 | printf(" initial key range: %zu\n", init_range); |
| 269 | printf(" lookup range: %lu\n", lookup_range); |
| 270 | printf(" update range: %lu\n", update_range); |
| 271 | } |
| 272 | |
| 273 | static void do_threshold(double rate, uint64_t *threshold) |
| 274 | { |
| 275 | if (rate == 1.0) { |
| 276 | *threshold = UINT64_MAX; |
| 277 | } else { |
| 278 | *threshold = rate * UINT64_MAX; |
| 279 | } |
| 280 | } |
| 281 | |
| 282 | static void htable_init(void) |
| 283 | { |
| 284 | unsigned long n = MAX(init_range, update_range); |
| 285 | uint64_t r = time(NULL); |
| 286 | size_t retries = 0; |
| 287 | size_t i; |
| 288 | |
| 289 | /* avoid allocating memory later by allocating all the keys now */ |
| 290 | keys = g_malloc(sizeof(*keys) * n); |
| 291 | for (i = 0; i < n; i++) { |
| 292 | keys[i] = populate_offset + i; |
| 293 | } |
| 294 | |
| 295 | /* some sanity checks */ |
| 296 | g_assert_cmpuint(lookup_range, <=, n); |
| 297 | |
| 298 | /* compute thresholds */ |
| 299 | do_threshold(update_rate, &update_threshold); |
| 300 | do_threshold(resize_rate, &resize_threshold); |
| 301 | |
| 302 | if (resize_rate) { |
| 303 | resize_min = n / 2; |
| 304 | resize_max = n; |
| 305 | assert(resize_min < resize_max); |
| 306 | } else { |
| 307 | n_rz_threads = 0; |
| 308 | } |
| 309 | |
| 310 | /* initialize the hash table */ |
| 311 | qht_init(&ht, qht_n_elems, qht_mode); |
| 312 | assert(init_size <= init_range); |
| 313 | |
| 314 | pr_params(); |
| 315 | |
| 316 | fprintf(stderr, "Initialization: populating %zu items...", init_size); |
| 317 | for (i = 0; i < init_size; i++) { |
| 318 | for (;;) { |
| 319 | uint32_t hash; |
| 320 | long *p; |
| 321 | |
| 322 | r = xorshift64star(r); |
| 323 | p = &keys[r & (init_range - 1)]; |
| 324 | hash = h(*p); |
| 325 | if (qht_insert(&ht, p, hash)) { |
| 326 | break; |
| 327 | } |
| 328 | retries++; |
| 329 | } |
| 330 | } |
| 331 | fprintf(stderr, " populated after %zu retries\n", retries); |
| 332 | } |
| 333 | |
| 334 | static void add_stats(struct thread_stats *s, struct thread_info *info, int n) |
| 335 | { |
| 336 | int i; |
| 337 | |
| 338 | for (i = 0; i < n; i++) { |
| 339 | struct thread_stats *stats = &info[i].stats; |
| 340 | |
| 341 | s->rd += stats->rd; |
| 342 | s->not_rd += stats->not_rd; |
| 343 | |
| 344 | s->in += stats->in; |
| 345 | s->not_in += stats->not_in; |
| 346 | |
| 347 | s->rm += stats->rm; |
| 348 | s->not_rm += stats->not_rm; |
| 349 | |
| 350 | s->rz += stats->rz; |
| 351 | s->not_rz += stats->not_rz; |
| 352 | } |
| 353 | } |
| 354 | |
| 355 | static void pr_stats(void) |
| 356 | { |
| 357 | struct thread_stats s = {}; |
| 358 | double tx; |
| 359 | |
| 360 | add_stats(&s, rw_info, n_rw_threads); |
| 361 | add_stats(&s, rz_info, n_rz_threads); |
| 362 | |
| 363 | printf("Results:\n"); |
| 364 | |
| 365 | if (resize_rate) { |
| 366 | printf(" Resizes: %zu (%.2f%% of %zu)\n", |
| 367 | s.rz, (double)s.rz / (s.rz + s.not_rz) * 100, s.rz + s.not_rz); |
| 368 | } |
| 369 | |
| 370 | printf(" Read: %.2f M (%.2f%% of %.2fM)\n", |
| 371 | (double)s.rd / 1e6, |
| 372 | (double)s.rd / (s.rd + s.not_rd) * 100, |
| 373 | (double)(s.rd + s.not_rd) / 1e6); |
| 374 | printf(" Inserted: %.2f M (%.2f%% of %.2fM)\n", |
| 375 | (double)s.in / 1e6, |
| 376 | (double)s.in / (s.in + s.not_in) * 100, |
| 377 | (double)(s.in + s.not_in) / 1e6); |
| 378 | printf(" Removed: %.2f M (%.2f%% of %.2fM)\n", |
| 379 | (double)s.rm / 1e6, |
| 380 | (double)s.rm / (s.rm + s.not_rm) * 100, |
| 381 | (double)(s.rm + s.not_rm) / 1e6); |
| 382 | |
| 383 | tx = (s.rd + s.not_rd + s.in + s.not_in + s.rm + s.not_rm) / 1e6 / duration; |
| 384 | printf(" Throughput: %.2f MT/s\n", tx); |
| 385 | printf(" Throughput/thread: %.2f MT/s/thread\n", tx / n_rw_threads); |
| 386 | } |
| 387 | |
| 388 | static void run_test(void) |
| 389 | { |
| 390 | unsigned int remaining; |
| 391 | int i; |
| 392 | |
| 393 | while (atomic_read(&n_ready_threads) != n_rw_threads + n_rz_threads) { |
| 394 | cpu_relax(); |
| 395 | } |
Emilio G. Cota | 977ec47 | 2016-10-14 11:54:51 +0200 | [diff] [blame] | 396 | atomic_set(&test_start, true); |
Emilio G. Cota | 515864a | 2016-06-08 14:55:30 -0400 | [diff] [blame] | 397 | do { |
| 398 | remaining = sleep(duration); |
| 399 | } while (remaining); |
Emilio G. Cota | 977ec47 | 2016-10-14 11:54:51 +0200 | [diff] [blame] | 400 | atomic_set(&test_stop, true); |
Emilio G. Cota | 515864a | 2016-06-08 14:55:30 -0400 | [diff] [blame] | 401 | |
| 402 | for (i = 0; i < n_rw_threads; i++) { |
| 403 | qemu_thread_join(&rw_threads[i]); |
| 404 | } |
| 405 | for (i = 0; i < n_rz_threads; i++) { |
| 406 | qemu_thread_join(&rz_threads[i]); |
| 407 | } |
| 408 | } |
| 409 | |
| 410 | static void parse_args(int argc, char *argv[]) |
| 411 | { |
| 412 | int c; |
| 413 | |
| 414 | for (;;) { |
| 415 | c = getopt(argc, argv, "d:D:g:k:K:l:hn:N:o:r:Rs:S:u:"); |
| 416 | if (c < 0) { |
| 417 | break; |
| 418 | } |
| 419 | switch (c) { |
| 420 | case 'd': |
| 421 | duration = atoi(optarg); |
| 422 | break; |
| 423 | case 'D': |
| 424 | resize_delay = atol(optarg); |
| 425 | break; |
| 426 | case 'g': |
| 427 | init_range = pow2ceil(atol(optarg)); |
| 428 | lookup_range = pow2ceil(atol(optarg)); |
| 429 | update_range = pow2ceil(atol(optarg)); |
| 430 | qht_n_elems = atol(optarg); |
| 431 | init_size = atol(optarg); |
| 432 | break; |
| 433 | case 'h': |
| 434 | usage_complete(argc, argv); |
| 435 | exit(0); |
| 436 | case 'k': |
| 437 | init_size = atol(optarg); |
| 438 | break; |
| 439 | case 'K': |
| 440 | init_range = pow2ceil(atol(optarg)); |
| 441 | break; |
| 442 | case 'l': |
| 443 | lookup_range = pow2ceil(atol(optarg)); |
| 444 | break; |
| 445 | case 'n': |
| 446 | n_rw_threads = atoi(optarg); |
| 447 | break; |
| 448 | case 'N': |
| 449 | n_rz_threads = atoi(optarg); |
| 450 | break; |
| 451 | case 'o': |
| 452 | populate_offset = atol(optarg); |
| 453 | break; |
| 454 | case 'r': |
| 455 | update_range = pow2ceil(atol(optarg)); |
| 456 | break; |
| 457 | case 'R': |
| 458 | qht_mode |= QHT_MODE_AUTO_RESIZE; |
| 459 | break; |
| 460 | case 's': |
| 461 | qht_n_elems = atol(optarg); |
| 462 | break; |
| 463 | case 'S': |
| 464 | resize_rate = atof(optarg) / 100.0; |
| 465 | if (resize_rate > 1.0) { |
| 466 | resize_rate = 1.0; |
| 467 | } |
| 468 | break; |
| 469 | case 'u': |
| 470 | update_rate = atof(optarg) / 100.0; |
| 471 | if (update_rate > 1.0) { |
| 472 | update_rate = 1.0; |
| 473 | } |
| 474 | break; |
| 475 | } |
| 476 | } |
| 477 | } |
| 478 | |
| 479 | int main(int argc, char *argv[]) |
| 480 | { |
| 481 | parse_args(argc, argv); |
| 482 | htable_init(); |
| 483 | create_threads(); |
| 484 | run_test(); |
| 485 | pr_stats(); |
| 486 | return 0; |
| 487 | } |