| /* |
| * QEMU model of the Milkymist programmable FPU. |
| * |
| * Copyright (c) 2010 Michael Walle <michael@walle.cc> |
| * |
| * This library is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Lesser General Public |
| * License as published by the Free Software Foundation; either |
| * version 2 of the License, or (at your option) any later version. |
| * |
| * This library is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Lesser General Public License for more details. |
| * |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with this library; if not, see <http://www.gnu.org/licenses/>. |
| * |
| * |
| * Specification available at: |
| * http://www.milkymist.org/socdoc/pfpu.pdf |
| * |
| */ |
| |
| #include "hw/hw.h" |
| #include "hw/sysbus.h" |
| #include "trace.h" |
| #include "qemu/log.h" |
| #include "qemu/error-report.h" |
| #include <math.h> |
| |
| /* #define TRACE_EXEC */ |
| |
| #ifdef TRACE_EXEC |
| # define D_EXEC(x) x |
| #else |
| # define D_EXEC(x) |
| #endif |
| |
| enum { |
| R_CTL = 0, |
| R_MESHBASE, |
| R_HMESHLAST, |
| R_VMESHLAST, |
| R_CODEPAGE, |
| R_VERTICES, |
| R_COLLISIONS, |
| R_STRAYWRITES, |
| R_LASTDMA, |
| R_PC, |
| R_DREGBASE, |
| R_CODEBASE, |
| R_MAX |
| }; |
| |
| enum { |
| CTL_START_BUSY = (1<<0), |
| }; |
| |
| enum { |
| OP_NOP = 0, |
| OP_FADD, |
| OP_FSUB, |
| OP_FMUL, |
| OP_FABS, |
| OP_F2I, |
| OP_I2F, |
| OP_VECTOUT, |
| OP_SIN, |
| OP_COS, |
| OP_ABOVE, |
| OP_EQUAL, |
| OP_COPY, |
| OP_IF, |
| OP_TSIGN, |
| OP_QUAKE, |
| }; |
| |
| enum { |
| GPR_X = 0, |
| GPR_Y = 1, |
| GPR_FLAGS = 2, |
| }; |
| |
| enum { |
| LATENCY_FADD = 5, |
| LATENCY_FSUB = 5, |
| LATENCY_FMUL = 7, |
| LATENCY_FABS = 2, |
| LATENCY_F2I = 2, |
| LATENCY_I2F = 3, |
| LATENCY_VECTOUT = 0, |
| LATENCY_SIN = 4, |
| LATENCY_COS = 4, |
| LATENCY_ABOVE = 2, |
| LATENCY_EQUAL = 2, |
| LATENCY_COPY = 2, |
| LATENCY_IF = 2, |
| LATENCY_TSIGN = 2, |
| LATENCY_QUAKE = 2, |
| MAX_LATENCY = 7 |
| }; |
| |
| #define GPR_BEGIN 0x100 |
| #define GPR_END 0x17f |
| #define MICROCODE_BEGIN 0x200 |
| #define MICROCODE_END 0x3ff |
| #define MICROCODE_WORDS 2048 |
| |
| #define REINTERPRET_CAST(type, val) (*((type *)&(val))) |
| |
| #ifdef TRACE_EXEC |
| static const char *opcode_to_str[] = { |
| "NOP", "FADD", "FSUB", "FMUL", "FABS", "F2I", "I2F", "VECTOUT", |
| "SIN", "COS", "ABOVE", "EQUAL", "COPY", "IF", "TSIGN", "QUAKE", |
| }; |
| #endif |
| |
| struct MilkymistPFPUState { |
| SysBusDevice busdev; |
| MemoryRegion regs_region; |
| CharDriverState *chr; |
| qemu_irq irq; |
| |
| uint32_t regs[R_MAX]; |
| uint32_t gp_regs[128]; |
| uint32_t microcode[MICROCODE_WORDS]; |
| |
| int output_queue_pos; |
| uint32_t output_queue[MAX_LATENCY]; |
| }; |
| typedef struct MilkymistPFPUState MilkymistPFPUState; |
| |
| static inline hwaddr |
| get_dma_address(uint32_t base, uint32_t x, uint32_t y) |
| { |
| return base + 8 * (128 * y + x); |
| } |
| |
| static inline void |
| output_queue_insert(MilkymistPFPUState *s, uint32_t val, int pos) |
| { |
| s->output_queue[(s->output_queue_pos + pos) % MAX_LATENCY] = val; |
| } |
| |
| static inline uint32_t |
| output_queue_remove(MilkymistPFPUState *s) |
| { |
| return s->output_queue[s->output_queue_pos]; |
| } |
| |
| static inline void |
| output_queue_advance(MilkymistPFPUState *s) |
| { |
| s->output_queue[s->output_queue_pos] = 0; |
| s->output_queue_pos = (s->output_queue_pos + 1) % MAX_LATENCY; |
| } |
| |
| static int pfpu_decode_insn(MilkymistPFPUState *s) |
| { |
| uint32_t pc = s->regs[R_PC]; |
| uint32_t insn = s->microcode[pc]; |
| uint32_t reg_a = (insn >> 18) & 0x7f; |
| uint32_t reg_b = (insn >> 11) & 0x7f; |
| uint32_t op = (insn >> 7) & 0xf; |
| uint32_t reg_d = insn & 0x7f; |
| uint32_t r = 0; |
| int latency = 0; |
| |
| switch (op) { |
| case OP_NOP: |
| break; |
| case OP_FADD: |
| { |
| float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); |
| float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); |
| float t = a + b; |
| r = REINTERPRET_CAST(uint32_t, t); |
| latency = LATENCY_FADD; |
| D_EXEC(qemu_log("ADD a=%f b=%f t=%f, r=%08x\n", a, b, t, r)); |
| } break; |
| case OP_FSUB: |
| { |
| float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); |
| float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); |
| float t = a - b; |
| r = REINTERPRET_CAST(uint32_t, t); |
| latency = LATENCY_FSUB; |
| D_EXEC(qemu_log("SUB a=%f b=%f t=%f, r=%08x\n", a, b, t, r)); |
| } break; |
| case OP_FMUL: |
| { |
| float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); |
| float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); |
| float t = a * b; |
| r = REINTERPRET_CAST(uint32_t, t); |
| latency = LATENCY_FMUL; |
| D_EXEC(qemu_log("MUL a=%f b=%f t=%f, r=%08x\n", a, b, t, r)); |
| } break; |
| case OP_FABS: |
| { |
| float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); |
| float t = fabsf(a); |
| r = REINTERPRET_CAST(uint32_t, t); |
| latency = LATENCY_FABS; |
| D_EXEC(qemu_log("ABS a=%f t=%f, r=%08x\n", a, t, r)); |
| } break; |
| case OP_F2I: |
| { |
| float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); |
| int32_t t = a; |
| r = REINTERPRET_CAST(uint32_t, t); |
| latency = LATENCY_F2I; |
| D_EXEC(qemu_log("F2I a=%f t=%d, r=%08x\n", a, t, r)); |
| } break; |
| case OP_I2F: |
| { |
| int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]); |
| float t = a; |
| r = REINTERPRET_CAST(uint32_t, t); |
| latency = LATENCY_I2F; |
| D_EXEC(qemu_log("I2F a=%08x t=%f, r=%08x\n", a, t, r)); |
| } break; |
| case OP_VECTOUT: |
| { |
| uint32_t a = cpu_to_be32(s->gp_regs[reg_a]); |
| uint32_t b = cpu_to_be32(s->gp_regs[reg_b]); |
| hwaddr dma_ptr = |
| get_dma_address(s->regs[R_MESHBASE], |
| s->gp_regs[GPR_X], s->gp_regs[GPR_Y]); |
| cpu_physical_memory_write(dma_ptr, &a, 4); |
| cpu_physical_memory_write(dma_ptr + 4, &b, 4); |
| s->regs[R_LASTDMA] = dma_ptr + 4; |
| D_EXEC(qemu_log("VECTOUT a=%08x b=%08x dma=%08x\n", a, b, dma_ptr)); |
| trace_milkymist_pfpu_vectout(a, b, dma_ptr); |
| } break; |
| case OP_SIN: |
| { |
| int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]); |
| float t = sinf(a * (1.0f / (M_PI * 4096.0f))); |
| r = REINTERPRET_CAST(uint32_t, t); |
| latency = LATENCY_SIN; |
| D_EXEC(qemu_log("SIN a=%d t=%f, r=%08x\n", a, t, r)); |
| } break; |
| case OP_COS: |
| { |
| int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]); |
| float t = cosf(a * (1.0f / (M_PI * 4096.0f))); |
| r = REINTERPRET_CAST(uint32_t, t); |
| latency = LATENCY_COS; |
| D_EXEC(qemu_log("COS a=%d t=%f, r=%08x\n", a, t, r)); |
| } break; |
| case OP_ABOVE: |
| { |
| float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); |
| float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); |
| float t = (a > b) ? 1.0f : 0.0f; |
| r = REINTERPRET_CAST(uint32_t, t); |
| latency = LATENCY_ABOVE; |
| D_EXEC(qemu_log("ABOVE a=%f b=%f t=%f, r=%08x\n", a, b, t, r)); |
| } break; |
| case OP_EQUAL: |
| { |
| float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); |
| float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); |
| float t = (a == b) ? 1.0f : 0.0f; |
| r = REINTERPRET_CAST(uint32_t, t); |
| latency = LATENCY_EQUAL; |
| D_EXEC(qemu_log("EQUAL a=%f b=%f t=%f, r=%08x\n", a, b, t, r)); |
| } break; |
| case OP_COPY: |
| { |
| r = s->gp_regs[reg_a]; |
| latency = LATENCY_COPY; |
| D_EXEC(qemu_log("COPY")); |
| } break; |
| case OP_IF: |
| { |
| float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); |
| float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); |
| uint32_t f = s->gp_regs[GPR_FLAGS]; |
| float t = (f != 0) ? a : b; |
| r = REINTERPRET_CAST(uint32_t, t); |
| latency = LATENCY_IF; |
| D_EXEC(qemu_log("IF f=%u a=%f b=%f t=%f, r=%08x\n", f, a, b, t, r)); |
| } break; |
| case OP_TSIGN: |
| { |
| float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); |
| float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); |
| float t = (b < 0) ? -a : a; |
| r = REINTERPRET_CAST(uint32_t, t); |
| latency = LATENCY_TSIGN; |
| D_EXEC(qemu_log("TSIGN a=%f b=%f t=%f, r=%08x\n", a, b, t, r)); |
| } break; |
| case OP_QUAKE: |
| { |
| uint32_t a = s->gp_regs[reg_a]; |
| r = 0x5f3759df - (a >> 1); |
| latency = LATENCY_QUAKE; |
| D_EXEC(qemu_log("QUAKE a=%d r=%08x\n", a, r)); |
| } break; |
| |
| default: |
| error_report("milkymist_pfpu: unknown opcode %d", op); |
| break; |
| } |
| |
| if (!reg_d) { |
| D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d>\n", |
| s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency, |
| s->regs[R_PC] + latency)); |
| } else { |
| D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d> -> R%03d\n", |
| s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency, |
| s->regs[R_PC] + latency, reg_d)); |
| } |
| |
| if (op == OP_VECTOUT) { |
| return 0; |
| } |
| |
| /* store output for this cycle */ |
| if (reg_d) { |
| uint32_t val = output_queue_remove(s); |
| D_EXEC(qemu_log("R%03d <- 0x%08x\n", reg_d, val)); |
| s->gp_regs[reg_d] = val; |
| } |
| |
| output_queue_advance(s); |
| |
| /* store op output */ |
| if (op != OP_NOP) { |
| output_queue_insert(s, r, latency-1); |
| } |
| |
| /* advance PC */ |
| s->regs[R_PC]++; |
| |
| return 1; |
| }; |
| |
| static void pfpu_start(MilkymistPFPUState *s) |
| { |
| int x, y; |
| int i; |
| |
| for (y = 0; y <= s->regs[R_VMESHLAST]; y++) { |
| for (x = 0; x <= s->regs[R_HMESHLAST]; x++) { |
| D_EXEC(qemu_log("\nprocessing x=%d y=%d\n", x, y)); |
| |
| /* set current position */ |
| s->gp_regs[GPR_X] = x; |
| s->gp_regs[GPR_Y] = y; |
| |
| /* run microcode on this position */ |
| i = 0; |
| while (pfpu_decode_insn(s)) { |
| /* decode at most MICROCODE_WORDS instructions */ |
| if (i++ >= MICROCODE_WORDS) { |
| error_report("milkymist_pfpu: too many instructions " |
| "executed in microcode. No VECTOUT?"); |
| break; |
| } |
| } |
| |
| /* reset pc for next run */ |
| s->regs[R_PC] = 0; |
| } |
| } |
| |
| s->regs[R_VERTICES] = x * y; |
| |
| trace_milkymist_pfpu_pulse_irq(); |
| qemu_irq_pulse(s->irq); |
| } |
| |
| static inline int get_microcode_address(MilkymistPFPUState *s, uint32_t addr) |
| { |
| return (512 * s->regs[R_CODEPAGE]) + addr - MICROCODE_BEGIN; |
| } |
| |
| static uint64_t pfpu_read(void *opaque, hwaddr addr, |
| unsigned size) |
| { |
| MilkymistPFPUState *s = opaque; |
| uint32_t r = 0; |
| |
| addr >>= 2; |
| switch (addr) { |
| case R_CTL: |
| case R_MESHBASE: |
| case R_HMESHLAST: |
| case R_VMESHLAST: |
| case R_CODEPAGE: |
| case R_VERTICES: |
| case R_COLLISIONS: |
| case R_STRAYWRITES: |
| case R_LASTDMA: |
| case R_PC: |
| case R_DREGBASE: |
| case R_CODEBASE: |
| r = s->regs[addr]; |
| break; |
| case GPR_BEGIN ... GPR_END: |
| r = s->gp_regs[addr - GPR_BEGIN]; |
| break; |
| case MICROCODE_BEGIN ... MICROCODE_END: |
| r = s->microcode[get_microcode_address(s, addr)]; |
| break; |
| |
| default: |
| error_report("milkymist_pfpu: read access to unknown register 0x" |
| TARGET_FMT_plx, addr << 2); |
| break; |
| } |
| |
| trace_milkymist_pfpu_memory_read(addr << 2, r); |
| |
| return r; |
| } |
| |
| static void pfpu_write(void *opaque, hwaddr addr, uint64_t value, |
| unsigned size) |
| { |
| MilkymistPFPUState *s = opaque; |
| |
| trace_milkymist_pfpu_memory_write(addr, value); |
| |
| addr >>= 2; |
| switch (addr) { |
| case R_CTL: |
| if (value & CTL_START_BUSY) { |
| pfpu_start(s); |
| } |
| break; |
| case R_MESHBASE: |
| case R_HMESHLAST: |
| case R_VMESHLAST: |
| case R_CODEPAGE: |
| case R_VERTICES: |
| case R_COLLISIONS: |
| case R_STRAYWRITES: |
| case R_LASTDMA: |
| case R_PC: |
| case R_DREGBASE: |
| case R_CODEBASE: |
| s->regs[addr] = value; |
| break; |
| case GPR_BEGIN ... GPR_END: |
| s->gp_regs[addr - GPR_BEGIN] = value; |
| break; |
| case MICROCODE_BEGIN ... MICROCODE_END: |
| s->microcode[get_microcode_address(s, addr)] = value; |
| break; |
| |
| default: |
| error_report("milkymist_pfpu: write access to unknown register 0x" |
| TARGET_FMT_plx, addr << 2); |
| break; |
| } |
| } |
| |
| static const MemoryRegionOps pfpu_mmio_ops = { |
| .read = pfpu_read, |
| .write = pfpu_write, |
| .valid = { |
| .min_access_size = 4, |
| .max_access_size = 4, |
| }, |
| .endianness = DEVICE_NATIVE_ENDIAN, |
| }; |
| |
| static void milkymist_pfpu_reset(DeviceState *d) |
| { |
| MilkymistPFPUState *s = container_of(d, MilkymistPFPUState, busdev.qdev); |
| int i; |
| |
| for (i = 0; i < R_MAX; i++) { |
| s->regs[i] = 0; |
| } |
| for (i = 0; i < 128; i++) { |
| s->gp_regs[i] = 0; |
| } |
| for (i = 0; i < MICROCODE_WORDS; i++) { |
| s->microcode[i] = 0; |
| } |
| s->output_queue_pos = 0; |
| for (i = 0; i < MAX_LATENCY; i++) { |
| s->output_queue[i] = 0; |
| } |
| } |
| |
| static int milkymist_pfpu_init(SysBusDevice *dev) |
| { |
| MilkymistPFPUState *s = FROM_SYSBUS(typeof(*s), dev); |
| |
| sysbus_init_irq(dev, &s->irq); |
| |
| memory_region_init_io(&s->regs_region, &pfpu_mmio_ops, s, |
| "milkymist-pfpu", MICROCODE_END * 4); |
| sysbus_init_mmio(dev, &s->regs_region); |
| |
| return 0; |
| } |
| |
| static const VMStateDescription vmstate_milkymist_pfpu = { |
| .name = "milkymist-pfpu", |
| .version_id = 1, |
| .minimum_version_id = 1, |
| .minimum_version_id_old = 1, |
| .fields = (VMStateField[]) { |
| VMSTATE_UINT32_ARRAY(regs, MilkymistPFPUState, R_MAX), |
| VMSTATE_UINT32_ARRAY(gp_regs, MilkymistPFPUState, 128), |
| VMSTATE_UINT32_ARRAY(microcode, MilkymistPFPUState, MICROCODE_WORDS), |
| VMSTATE_INT32(output_queue_pos, MilkymistPFPUState), |
| VMSTATE_UINT32_ARRAY(output_queue, MilkymistPFPUState, MAX_LATENCY), |
| VMSTATE_END_OF_LIST() |
| } |
| }; |
| |
| static void milkymist_pfpu_class_init(ObjectClass *klass, void *data) |
| { |
| DeviceClass *dc = DEVICE_CLASS(klass); |
| SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass); |
| |
| k->init = milkymist_pfpu_init; |
| dc->reset = milkymist_pfpu_reset; |
| dc->vmsd = &vmstate_milkymist_pfpu; |
| } |
| |
| static const TypeInfo milkymist_pfpu_info = { |
| .name = "milkymist-pfpu", |
| .parent = TYPE_SYS_BUS_DEVICE, |
| .instance_size = sizeof(MilkymistPFPUState), |
| .class_init = milkymist_pfpu_class_init, |
| }; |
| |
| static void milkymist_pfpu_register_types(void) |
| { |
| type_register_static(&milkymist_pfpu_info); |
| } |
| |
| type_init(milkymist_pfpu_register_types) |