core/test/run-trace.c - skiboot - Git at Google

 // SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
 /*
  * Copyright 2013-2019 IBM Corp.
  */

 #include <config.h>
 #include <stdlib.h>
 #include <assert.h>
 #include <sched.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <unistd.h>
 #include <stdio.h>
 #include <stdbool.h>
 #include <sys/types.h>
 #include <sys/wait.h>

 #include <skiboot-valgrind.h>

 /* Don't include these: PPC-specific */
 #define __CPU_H
 #define __TIME_H
 #define __PROCESSOR_H

 #if defined(__i386__) || defined(__x86_64__)
 /* This is more than a lwsync, but it'll work */
 static void full_barrier(void)
 {
 	asm volatile("mfence" : : : "memory");
 }
 #define lwsync full_barrier
 #elif defined(__powerpc__) || defined(__powerpc64__)
 static inline void lwsync(void)
 {
 	asm volatile("lwsync" : : : "memory");
 }
 #else
 #error "Define lwsync for this arch"
 #endif

 #define zalloc(size) calloc((size), 1)

 struct cpu_thread {
 	uint32_t pir;
 	uint32_t chip_id;
 	struct trace_info *trace;
 	uint32_t server_no;
 	bool is_secondary;
 	struct cpu_thread *primary;
 };
 static struct cpu_thread *this_cpu(void);

 #define CPUS 4

 static struct cpu_thread fake_cpus[CPUS];

 static inline struct cpu_thread *next_cpu(struct cpu_thread *cpu)
 {
 	if (cpu == NULL)
 		return &fake_cpus[0];
 	cpu++;
 	if (cpu == &fake_cpus[CPUS])
 		return NULL;
 	return cpu;
 }

 #define first_cpu() next_cpu(NULL)

 #define for_each_cpu(cpu)	\
 	for (cpu = first_cpu(); cpu; cpu = next_cpu(cpu))

 static unsigned long timestamp;
 static unsigned long mftb(void)
 {
 	return timestamp;
 }

 static void *local_alloc(unsigned int chip_id,
 			 size_t size, size_t align)
 {
 	void *p;

 	(void)chip_id;
 	if (posix_memalign(&p, align, size))
 		p = NULL;
 	return p;
 }

 struct dt_node;
 extern struct dt_node *opal_node;

 #include "../trace.c"

 #include "../external/trace/trace.c"
 static struct trace_reader trace_readers[CPUS];
 struct trace_reader *my_trace_reader;
 #include "../device.c"

 char __rodata_start[1], __rodata_end[1];
 struct dt_node *opal_node;
 struct debug_descriptor debug_descriptor = {
 	.trace_mask = -1
 };

 const char *nvram_query_safe(const char *key __unused)
 {
 	return NULL;
 }

 void lock_caller(struct lock *l, const char *caller)
 {
 	(void)caller;
 	assert(!l->lock_val);
 	l->lock_val = 1;
 }

 void unlock(struct lock *l)
 {
 	assert(l->lock_val);
 	l->lock_val = 0;
 }

 struct cpu_thread *my_fake_cpu;
 static struct cpu_thread *this_cpu(void)
 {
 	return my_fake_cpu;
 }

 #include <sys/mman.h>
 #define PER_CHILD_TRACES ((RUNNING_ON_VALGRIND) ? (1024*16) : (1024*1024))

 static void write_trace_entries(int id)
 {
 	void exit(int);
 	unsigned int i;
 	union trace trace;

 	timestamp = id;
 	for (i = 0; i < PER_CHILD_TRACES; i++) {
 		timestamp = i * CPUS + id;
 		assert(sizeof(trace.hdr) % 8 == 0);
 		/* First child never repeats, second repeats once, etc. */
 		trace_add(&trace, 3 + ((i / (id + 1)) % 0x40),
 			  sizeof(trace.hdr));
 	}

 	/* Final entry has special type, so parent knows it's over. */
 	trace_add(&trace, 0x70, sizeof(trace.hdr));
 	exit(0);
 }

 static bool all_done(const bool done[])
 {
 	unsigned int i;

 	for (i = 0; i < CPUS; i++)
 		if (!done[i])
 			return false;
 	return true;
 }

 static void test_parallel(void)
 {
 	void *p;
 	unsigned int cpu;
 	unsigned int i, counts[CPUS] = { 0 }, overflows[CPUS] = { 0 };
 	unsigned int repeats[CPUS] = { 0 }, num_overflows[CPUS] = { 0 };
 	bool done[CPUS] = { false };
 	size_t len = sizeof(struct trace_info) + TBUF_SZ + sizeof(union trace);
 	int last = 0;

 	/* Use a shared mmap to test actual parallel buffers. */
 	i = (CPUS*len + getpagesize()-1)&~(getpagesize()-1);
 	p = mmap(NULL, i, PROT_READ|PROT_WRITE,
 		 MAP_ANONYMOUS|MAP_SHARED, -1, 0);

 	for (i = 0; i < CPUS; i++) {
 		fake_cpus[i].trace = p + i * len;
 		fake_cpus[i].trace->tb.buf_size = cpu_to_be64(TBUF_SZ);
 		fake_cpus[i].trace->tb.max_size = cpu_to_be32(sizeof(union trace));
 		fake_cpus[i].is_secondary = false;
 		memset(&trace_readers[i], 0, sizeof(struct trace_reader));
 		trace_readers[i].tb = &fake_cpus[i].trace->tb;
 	}

 	for (i = 0; i < CPUS; i++) {
 		if (!fork()) {
 			/* Child. */
 			my_fake_cpu = &fake_cpus[i];
 			write_trace_entries(i);
 		}
 	}

 	while (!all_done(done)) {
 		union trace t;

 		for (i = 0; i < CPUS; i++) {
 			if (trace_get(&t, &trace_readers[(i+last) % CPUS]))
 				break;
 		}

 		if (i == CPUS) {
 			sched_yield();
 			continue;
 		}
 		i = (i + last) % CPUS;
 		last = i;

 		if (t.hdr.type == TRACE_OVERFLOW) {
 			/* Conveniently, each record is 16 bytes here. */
 			assert(be64_to_cpu(t.overflow.bytes_missed) % 16 == 0);
 			overflows[i] += be64_to_cpu(t.overflow.bytes_missed) / 16;
 			num_overflows[i]++;
 			continue;
 		}

 		assert(be16_to_cpu(t.hdr.cpu) < CPUS);
 		assert(!done[be16_to_cpu(t.hdr.cpu)]);
 		assert(be64_to_cpu(t.hdr.timestamp) % CPUS == be16_to_cpu(t.hdr.cpu));
 		if (t.hdr.type == TRACE_REPEAT) {
 			assert(t.hdr.len_div_8 * 8 == sizeof(t.repeat));
 			assert(be16_to_cpu(t.repeat.num) != 0);
 			assert(be16_to_cpu(t.repeat.num) <= be16_to_cpu(t.hdr.cpu));
 			repeats[be16_to_cpu(t.hdr.cpu)] += be16_to_cpu(t.repeat.num);
 		} else if (t.hdr.type == 0x70) {
 			cpu = be16_to_cpu(t.hdr.cpu);
 			assert(cpu < CPUS);
 			done[cpu] = true;
 		} else {
 			cpu = be16_to_cpu(t.hdr.cpu);
 			assert(cpu < CPUS);
 			counts[cpu]++;
 		}
 	}

 	/* Gather children. */
 	for (i = 0; i < CPUS; i++) {
 		int status;
 		wait(&status);
 	}

 	for (i = 0; i < CPUS; i++) {
 		printf("Child %i: %u produced, %u overflows, %llu total\n", i,
 		       counts[i], overflows[i],
 		       (long long)be64_to_cpu(fake_cpus[i].trace->tb.end));
 		assert(counts[i] + repeats[i] <= PER_CHILD_TRACES);
 	}
 	/* Child 0 never repeats. */
 	assert(repeats[0] == 0);
 	assert(counts[0] + overflows[0] == PER_CHILD_TRACES);

 	/*
 	 * FIXME: Other children have some fuzz, since overflows may
 	 * include repeat record we already read.  And odd-numbered
 	 * overflows may include more repeat records than normal
 	 * records (they alternate).
 	 */
 }

 int main(void)
 {
 	union trace minimal;
 	union trace large;
 	union trace trace;
 	unsigned int i, j;

 	opal_node = dt_new_root("opal");
 	dt_new(dt_new(opal_node, "firmware"), "exports");
 	for (i = 0; i < CPUS; i++) {
 		fake_cpus[i].server_no = i;
 		fake_cpus[i].pir = i;
 		fake_cpus[i].is_secondary = (i & 0x1);
 		fake_cpus[i].primary = &fake_cpus[i & ~0x1];
 	}
 	my_fake_cpu = &fake_cpus[0];
 	my_trace_reader = &trace_readers[0];
 	init_trace_buffers();

 	for (i = 0; i < CPUS; i++) {
 		trace_readers[i].tb = &fake_cpus[i].trace->tb;
 		assert(trace_empty(&trace_readers[i]));
 		assert(!trace_get(&trace, &trace_readers[i]));
 	}

 	assert(sizeof(trace.hdr) % 8 == 0);
 	timestamp = 1;
 	trace_add(&minimal, 100, sizeof(trace.hdr));
 	assert(trace_get(&trace, my_trace_reader));
 	assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
 	assert(be64_to_cpu(trace.hdr.timestamp) == timestamp);

 	/* Make it wrap once. */
 	for (i = 0; i < TBUF_SZ / (minimal.hdr.len_div_8 * 8) + 1; i++) {
 		timestamp = i;
 		trace_add(&minimal, 99 + (i%2), sizeof(trace.hdr));
 	}

 	assert(trace_get(&trace, my_trace_reader));
 	/* First one must be overflow marker. */
 	assert(trace.hdr.type == TRACE_OVERFLOW);
 	assert(trace.hdr.len_div_8 * 8 == sizeof(trace.overflow));
 	assert(be64_to_cpu(trace.overflow.bytes_missed) == minimal.hdr.len_div_8 * 8);

 	for (i = 0; i < TBUF_SZ / (minimal.hdr.len_div_8 * 8); i++) {
 		assert(trace_get(&trace, my_trace_reader));
 		assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
 		assert(be64_to_cpu(trace.hdr.timestamp) == i+1);
 		assert(trace.hdr.type == 99 + ((i+1)%2));
 	}
 	assert(!trace_get(&trace, my_trace_reader));

 	/* Now put in some weird-length ones, to test overlap.
 	 * Last power of 2, minus 8. */
 	for (j = 0; (1 << j) < sizeof(large); j++);
 	for (i = 0; i < TBUF_SZ; i++) {
 		timestamp = i;
 		trace_add(&large, 100 + (i%2), (1 << (j-1)));
 	}
 	assert(trace_get(&trace, my_trace_reader));
 	assert(trace.hdr.type == TRACE_OVERFLOW);
 	assert(trace_get(&trace, my_trace_reader));
 	assert(trace.hdr.len_div_8 == large.hdr.len_div_8);
 	i = be64_to_cpu(trace.hdr.timestamp);
 	while (trace_get(&trace, my_trace_reader))
 		assert(be64_to_cpu(trace.hdr.timestamp) == ++i);

 	/* Test repeats. */
 	for (i = 0; i < 65538; i++) {
 		timestamp = i;
 		trace_add(&minimal, 100, sizeof(trace.hdr));
 	}
 	timestamp = i;
 	trace_add(&minimal, 101, sizeof(trace.hdr));
 	timestamp = i+1;
 	trace_add(&minimal, 101, sizeof(trace.hdr));

 	assert(trace_get(&trace, my_trace_reader));
 	assert(trace.hdr.timestamp == 0);
 	assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
 	assert(trace.hdr.type == 100);
 	assert(trace_get(&trace, my_trace_reader));
 	assert(trace.hdr.type == TRACE_REPEAT);
 	assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat));
 	assert(be16_to_cpu(trace.repeat.num) == 65535);
 	assert(be64_to_cpu(trace.repeat.timestamp) == 65535);
 	assert(trace_get(&trace, my_trace_reader));
 	assert(be64_to_cpu(trace.hdr.timestamp) == 65536);
 	assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
 	assert(trace.hdr.type == 100);
 	assert(trace_get(&trace, my_trace_reader));
 	assert(trace.hdr.type == TRACE_REPEAT);
 	assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat));
 	assert(be16_to_cpu(trace.repeat.num) == 1);
 	assert(be64_to_cpu(trace.repeat.timestamp) == 65537);

 	assert(trace_get(&trace, my_trace_reader));
 	assert(be64_to_cpu(trace.hdr.timestamp) == 65538);
 	assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
 	assert(trace.hdr.type == 101);
 	assert(trace_get(&trace, my_trace_reader));
 	assert(trace.hdr.type == TRACE_REPEAT);
 	assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat));
 	assert(be16_to_cpu(trace.repeat.num) == 1);
 	assert(be64_to_cpu(trace.repeat.timestamp) == 65539);

 	/* Now, test adding repeat while we're reading... */
 	timestamp = 0;
 	trace_add(&minimal, 100, sizeof(trace.hdr));
 	assert(trace_get(&trace, my_trace_reader));
 	assert(be64_to_cpu(trace.hdr.timestamp) == 0);
 	assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
 	assert(trace.hdr.type == 100);

 	for (i = 1; i < TBUF_SZ; i++) {
 		timestamp = i;
 		trace_add(&minimal, 100, sizeof(trace.hdr));
 		assert(trace_get(&trace, my_trace_reader));
 		if (i % 65536 == 0) {
 			assert(trace.hdr.type == 100);
 			assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
 		} else {
 			assert(trace.hdr.type == TRACE_REPEAT);
 			assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat));
 			assert(be16_to_cpu(trace.repeat.num) == 1);
 		}
 		assert(be64_to_cpu(trace.repeat.timestamp) == i);
 		assert(!trace_get(&trace, my_trace_reader));
 	}

 	for (i = 0; i < CPUS; i++)
 		if (!fake_cpus[i].is_secondary)
 			free(fake_cpus[i].trace);

 	test_parallel();

 	return 0;
 }
	// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
	/*
	* Copyright 2013-2019 IBM Corp.
	*/

	#include <config.h>
	#include <stdlib.h>
	#include <assert.h>
	#include <sched.h>
	#include <stdlib.h>
	#include <stdint.h>
	#include <unistd.h>
	#include <stdio.h>
	#include <stdbool.h>
	#include <sys/types.h>
	#include <sys/wait.h>

	#include <skiboot-valgrind.h>

	/* Don't include these: PPC-specific */
	#define __CPU_H
	#define __TIME_H
	#define __PROCESSOR_H

	#if defined(__i386__) \|\| defined(__x86_64__)
	/* This is more than a lwsync, but it'll work */
	static void full_barrier(void)
	{
	asm volatile("mfence" : : : "memory");
	}
	#define lwsync full_barrier
	#elif defined(__powerpc__) \|\| defined(__powerpc64__)
	static inline void lwsync(void)
	{
	asm volatile("lwsync" : : : "memory");
	}
	#else
	#error "Define lwsync for this arch"
	#endif

	#define zalloc(size) calloc((size), 1)

	struct cpu_thread {
	uint32_t pir;
	uint32_t chip_id;
	struct trace_info *trace;
	uint32_t server_no;
	bool is_secondary;
	struct cpu_thread *primary;
	};
	static struct cpu_thread *this_cpu(void);

	#define CPUS 4

	static struct cpu_thread fake_cpus[CPUS];

	static inline struct cpu_thread next_cpu(struct cpu_thread cpu)
	{
	if (cpu == NULL)
	return &fake_cpus[0];
	cpu++;
	if (cpu == &fake_cpus[CPUS])
	return NULL;
	return cpu;
	}

	#define first_cpu() next_cpu(NULL)

	#define for_each_cpu(cpu) \
	for (cpu = first_cpu(); cpu; cpu = next_cpu(cpu))

	static unsigned long timestamp;
	static unsigned long mftb(void)
	{
	return timestamp;
	}

	static void *local_alloc(unsigned int chip_id,
	size_t size, size_t align)
	{
	void *p;

	(void)chip_id;
	if (posix_memalign(&p, align, size))
	p = NULL;
	return p;
	}

	struct dt_node;
	extern struct dt_node *opal_node;

	#include "../trace.c"

	#include "../external/trace/trace.c"
	static struct trace_reader trace_readers[CPUS];
	struct trace_reader *my_trace_reader;
	#include "../device.c"

	char __rodata_start[1], __rodata_end[1];
	struct dt_node *opal_node;
	struct debug_descriptor debug_descriptor = {
	.trace_mask = -1
	};

	const char nvram_query_safe(const char key __unused)
	{
	return NULL;
	}

	void lock_caller(struct lock l, const char caller)
	{
	(void)caller;
	assert(!l->lock_val);
	l->lock_val = 1;
	}

	void unlock(struct lock *l)
	{
	assert(l->lock_val);
	l->lock_val = 0;
	}

	struct cpu_thread *my_fake_cpu;
	static struct cpu_thread *this_cpu(void)
	{
	return my_fake_cpu;
	}

	#include <sys/mman.h>
	#define PER_CHILD_TRACES ((RUNNING_ON_VALGRIND) ? (102416) : (10241024))

	static void write_trace_entries(int id)
	{
	void exit(int);
	unsigned int i;
	union trace trace;

	timestamp = id;
	for (i = 0; i < PER_CHILD_TRACES; i++) {
	timestamp = i * CPUS + id;
	assert(sizeof(trace.hdr) % 8 == 0);
	/* First child never repeats, second repeats once, etc. */
	trace_add(&trace, 3 + ((i / (id + 1)) % 0x40),
	sizeof(trace.hdr));
	}

	/* Final entry has special type, so parent knows it's over. */
	trace_add(&trace, 0x70, sizeof(trace.hdr));
	exit(0);
	}

	static bool all_done(const bool done[])
	{
	unsigned int i;

	for (i = 0; i < CPUS; i++)
	if (!done[i])
	return false;
	return true;
	}

	static void test_parallel(void)
	{
	void *p;
	unsigned int cpu;
	unsigned int i, counts[CPUS] = { 0 }, overflows[CPUS] = { 0 };
	unsigned int repeats[CPUS] = { 0 }, num_overflows[CPUS] = { 0 };
	bool done[CPUS] = { false };
	size_t len = sizeof(struct trace_info) + TBUF_SZ + sizeof(union trace);
	int last = 0;

	/* Use a shared mmap to test actual parallel buffers. */
	i = (CPUS*len + getpagesize()-1)&~(getpagesize()-1);
	p = mmap(NULL, i, PROT_READ\|PROT_WRITE,
	MAP_ANONYMOUS\|MAP_SHARED, -1, 0);

	for (i = 0; i < CPUS; i++) {
	fake_cpus[i].trace = p + i * len;
	fake_cpus[i].trace->tb.buf_size = cpu_to_be64(TBUF_SZ);
	fake_cpus[i].trace->tb.max_size = cpu_to_be32(sizeof(union trace));
	fake_cpus[i].is_secondary = false;
	memset(&trace_readers[i], 0, sizeof(struct trace_reader));
	trace_readers[i].tb = &fake_cpus[i].trace->tb;
	}

	for (i = 0; i < CPUS; i++) {
	if (!fork()) {
	/* Child. */
	my_fake_cpu = &fake_cpus[i];
	write_trace_entries(i);
	}
	}

	while (!all_done(done)) {
	union trace t;

	for (i = 0; i < CPUS; i++) {
	if (trace_get(&t, &trace_readers[(i+last) % CPUS]))
	break;
	}

	if (i == CPUS) {
	sched_yield();
	continue;
	}
	i = (i + last) % CPUS;
	last = i;

	if (t.hdr.type == TRACE_OVERFLOW) {
	/* Conveniently, each record is 16 bytes here. */
	assert(be64_to_cpu(t.overflow.bytes_missed) % 16 == 0);
	overflows[i] += be64_to_cpu(t.overflow.bytes_missed) / 16;
	num_overflows[i]++;
	continue;
	}

	assert(be16_to_cpu(t.hdr.cpu) < CPUS);
	assert(!done[be16_to_cpu(t.hdr.cpu)]);
	assert(be64_to_cpu(t.hdr.timestamp) % CPUS == be16_to_cpu(t.hdr.cpu));
	if (t.hdr.type == TRACE_REPEAT) {
	assert(t.hdr.len_div_8 * 8 == sizeof(t.repeat));
	assert(be16_to_cpu(t.repeat.num) != 0);
	assert(be16_to_cpu(t.repeat.num) <= be16_to_cpu(t.hdr.cpu));
	repeats[be16_to_cpu(t.hdr.cpu)] += be16_to_cpu(t.repeat.num);
	} else if (t.hdr.type == 0x70) {
	cpu = be16_to_cpu(t.hdr.cpu);
	assert(cpu < CPUS);
	done[cpu] = true;
	} else {
	cpu = be16_to_cpu(t.hdr.cpu);
	assert(cpu < CPUS);
	counts[cpu]++;
	}
	}

	/* Gather children. */
	for (i = 0; i < CPUS; i++) {
	int status;
	wait(&status);
	}

	for (i = 0; i < CPUS; i++) {
	printf("Child %i: %u produced, %u overflows, %llu total\n", i,
	counts[i], overflows[i],
	(long long)be64_to_cpu(fake_cpus[i].trace->tb.end));
	assert(counts[i] + repeats[i] <= PER_CHILD_TRACES);
	}
	/* Child 0 never repeats. */
	assert(repeats[0] == 0);
	assert(counts[0] + overflows[0] == PER_CHILD_TRACES);

	/*
	* FIXME: Other children have some fuzz, since overflows may
	* include repeat record we already read. And odd-numbered
	* overflows may include more repeat records than normal
	* records (they alternate).
	*/
	}

	int main(void)
	{
	union trace minimal;
	union trace large;
	union trace trace;
	unsigned int i, j;

	opal_node = dt_new_root("opal");
	dt_new(dt_new(opal_node, "firmware"), "exports");
	for (i = 0; i < CPUS; i++) {
	fake_cpus[i].server_no = i;
	fake_cpus[i].pir = i;
	fake_cpus[i].is_secondary = (i & 0x1);
	fake_cpus[i].primary = &fake_cpus[i & ~0x1];
	}
	my_fake_cpu = &fake_cpus[0];
	my_trace_reader = &trace_readers[0];
	init_trace_buffers();

	for (i = 0; i < CPUS; i++) {
	trace_readers[i].tb = &fake_cpus[i].trace->tb;
	assert(trace_empty(&trace_readers[i]));
	assert(!trace_get(&trace, &trace_readers[i]));
	}

	assert(sizeof(trace.hdr) % 8 == 0);
	timestamp = 1;
	trace_add(&minimal, 100, sizeof(trace.hdr));
	assert(trace_get(&trace, my_trace_reader));
	assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
	assert(be64_to_cpu(trace.hdr.timestamp) == timestamp);

	/* Make it wrap once. */
	for (i = 0; i < TBUF_SZ / (minimal.hdr.len_div_8 * 8) + 1; i++) {
	timestamp = i;
	trace_add(&minimal, 99 + (i%2), sizeof(trace.hdr));
	}

	assert(trace_get(&trace, my_trace_reader));
	/* First one must be overflow marker. */
	assert(trace.hdr.type == TRACE_OVERFLOW);
	assert(trace.hdr.len_div_8 * 8 == sizeof(trace.overflow));
	assert(be64_to_cpu(trace.overflow.bytes_missed) == minimal.hdr.len_div_8 * 8);

	for (i = 0; i < TBUF_SZ / (minimal.hdr.len_div_8 * 8); i++) {
	assert(trace_get(&trace, my_trace_reader));
	assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
	assert(be64_to_cpu(trace.hdr.timestamp) == i+1);
	assert(trace.hdr.type == 99 + ((i+1)%2));
	}
	assert(!trace_get(&trace, my_trace_reader));

	/* Now put in some weird-length ones, to test overlap.
	* Last power of 2, minus 8. */
	for (j = 0; (1 << j) < sizeof(large); j++);
	for (i = 0; i < TBUF_SZ; i++) {
	timestamp = i;
	trace_add(&large, 100 + (i%2), (1 << (j-1)));
	}
	assert(trace_get(&trace, my_trace_reader));
	assert(trace.hdr.type == TRACE_OVERFLOW);
	assert(trace_get(&trace, my_trace_reader));
	assert(trace.hdr.len_div_8 == large.hdr.len_div_8);
	i = be64_to_cpu(trace.hdr.timestamp);
	while (trace_get(&trace, my_trace_reader))
	assert(be64_to_cpu(trace.hdr.timestamp) == ++i);

	/* Test repeats. */
	for (i = 0; i < 65538; i++) {
	timestamp = i;
	trace_add(&minimal, 100, sizeof(trace.hdr));
	}
	timestamp = i;
	trace_add(&minimal, 101, sizeof(trace.hdr));
	timestamp = i+1;
	trace_add(&minimal, 101, sizeof(trace.hdr));

	assert(trace_get(&trace, my_trace_reader));
	assert(trace.hdr.timestamp == 0);
	assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
	assert(trace.hdr.type == 100);
	assert(trace_get(&trace, my_trace_reader));
	assert(trace.hdr.type == TRACE_REPEAT);
	assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat));
	assert(be16_to_cpu(trace.repeat.num) == 65535);
	assert(be64_to_cpu(trace.repeat.timestamp) == 65535);
	assert(trace_get(&trace, my_trace_reader));
	assert(be64_to_cpu(trace.hdr.timestamp) == 65536);
	assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
	assert(trace.hdr.type == 100);
	assert(trace_get(&trace, my_trace_reader));
	assert(trace.hdr.type == TRACE_REPEAT);
	assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat));
	assert(be16_to_cpu(trace.repeat.num) == 1);
	assert(be64_to_cpu(trace.repeat.timestamp) == 65537);

	assert(trace_get(&trace, my_trace_reader));
	assert(be64_to_cpu(trace.hdr.timestamp) == 65538);
	assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
	assert(trace.hdr.type == 101);
	assert(trace_get(&trace, my_trace_reader));
	assert(trace.hdr.type == TRACE_REPEAT);
	assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat));
	assert(be16_to_cpu(trace.repeat.num) == 1);
	assert(be64_to_cpu(trace.repeat.timestamp) == 65539);

	/* Now, test adding repeat while we're reading... */
	timestamp = 0;
	trace_add(&minimal, 100, sizeof(trace.hdr));
	assert(trace_get(&trace, my_trace_reader));
	assert(be64_to_cpu(trace.hdr.timestamp) == 0);
	assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
	assert(trace.hdr.type == 100);

	for (i = 1; i < TBUF_SZ; i++) {
	timestamp = i;
	trace_add(&minimal, 100, sizeof(trace.hdr));
	assert(trace_get(&trace, my_trace_reader));
	if (i % 65536 == 0) {
	assert(trace.hdr.type == 100);
	assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
	} else {
	assert(trace.hdr.type == TRACE_REPEAT);
	assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat));
	assert(be16_to_cpu(trace.repeat.num) == 1);
	}
	assert(be64_to_cpu(trace.repeat.timestamp) == i);
	assert(!trace_get(&trace, my_trace_reader));
	}

	for (i = 0; i < CPUS; i++)
	if (!fake_cpus[i].is_secondary)
	free(fake_cpus[i].trace);

	test_parallel();

	return 0;
	}