sparc64 fixes (Blue Swirl)


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@1514 c046a42c-6fe2-441c-8c8c-71466251a162
diff --git a/Makefile.target b/Makefile.target
index 74b5da8..c5a5152 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -297,7 +297,10 @@
 endif
 ifeq ($(TARGET_BASE_ARCH), sparc)
 ifeq ($(TARGET_ARCH), sparc64)
-VL_OBJS+= sun4u.o m48t08.o magic-load.o slavio_serial.o
+VL_OBJS+= sun4u.o ide.o ne2000.o pckbd.o vga.o
+VL_OBJS+= fdc.o mc146818rtc.o serial.o m48t59.o
+VL_OBJS+= cirrus_vga.o parallel.o
+VL_OBJS+= magic-load.o
 else
 VL_OBJS+= sun4m.o tcx.o lance.o iommu.o m48t08.o magic-load.o slavio_intctl.o slavio_timer.o slavio_serial.o slavio_misc.o fdc.o esp.o
 endif
diff --git a/configure b/configure
index 76b4bfe..c18a9d1 100755
--- a/configure
+++ b/configure
@@ -153,6 +153,8 @@
   ;;
   --cc=*) cc=`echo $opt | cut -d '=' -f 2`
   ;;
+  --host-cc=*) host_cc=`echo $opt | cut -d '=' -f 2`
+  ;;
   --make=*) make=`echo $opt | cut -d '=' -f 2`
   ;;
   --extra-cflags=*) CFLAGS="${opt#--extra-cflags=}"
@@ -339,6 +341,7 @@
 echo "  --source-path=PATH       path of source code [$source_path]"
 echo "  --cross-prefix=PREFIX    use PREFIX for compile tools [$cross_prefix]"
 echo "  --cc=CC                  use C compiler CC [$cc]"
+echo "  --host-cc=CC             use C compiler CC [$cc] for dyngen etc."
 echo "  --make=MAKE              use specified make [$make]"
 echo "  --static                 enable static build [$static]"
 echo "  --enable-mingw32         enable Win32 cross compilation with mingw32"
@@ -420,6 +423,7 @@
 fi
 echo "Source path       $source_path"
 echo "C compiler        $cc"
+echo "Host C compiler   $host_cc"
 echo "make              $make"
 echo "host CPU          $cpu"
 echo "host big endian   $bigendian"
diff --git a/hw/magic-load.c b/hw/magic-load.c
index 63942c6..d5c098f 100644
--- a/hw/magic-load.c
+++ b/hw/magic-load.c
@@ -139,7 +139,7 @@
 
 	if (find_phdr64(&ehdr64, fd, &phdr, PT_LOAD))
 	    goto error;
-	retval = read_program64(fd, &phdr, addr, ehdr64.e_entry);
+	retval = read_program64(fd, &phdr, phys_ram_base + ehdr64.e_entry, ehdr64.e_entry);
 	if (retval < 0)
 	    goto error;
 	load_symbols64(&ehdr64, fd);
diff --git a/hw/pci.c b/hw/pci.c
index a5ecbf1..efca2cd 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -1291,6 +1291,253 @@
     return s;
 }
 
+/* Ultrasparc APB PCI host */
+static void pci_apb_config_writel (void *opaque, target_phys_addr_t addr,
+                                         uint32_t val)
+{
+    PCIBus *s = opaque;
+    int i;
+
+    for (i = 11; i < 32; i++) {
+        if ((val & (1 << i)) != 0)
+            break;
+    }
+    s->config_reg = 0x80000000 | (1 << 16) | (val & 0x7FC) | (i << 11);
+}
+
+static uint32_t pci_apb_config_readl (void *opaque,
+                                            target_phys_addr_t addr)
+{
+    PCIBus *s = opaque;
+    uint32_t val;
+    int devfn;
+
+    devfn = (s->config_reg >> 8) & 0xFF;
+    val = (1 << (devfn >> 3)) | ((devfn & 0x07) << 8) | (s->config_reg & 0xFC);
+    return val;
+}
+
+static CPUWriteMemoryFunc *pci_apb_config_write[] = {
+    &pci_apb_config_writel,
+    &pci_apb_config_writel,
+    &pci_apb_config_writel,
+};
+
+static CPUReadMemoryFunc *pci_apb_config_read[] = {
+    &pci_apb_config_readl,
+    &pci_apb_config_readl,
+    &pci_apb_config_readl,
+};
+
+static void apb_config_writel (void *opaque, target_phys_addr_t addr,
+			       uint32_t val)
+{
+    //PCIBus *s = opaque;
+
+    switch (addr & 0x3f) {
+    case 0x00: // Control/Status
+    case 0x10: // AFSR
+    case 0x18: // AFAR
+    case 0x20: // Diagnostic
+    case 0x28: // Target address space
+	// XXX
+    default:
+	break;
+    }
+}
+
+static uint32_t apb_config_readl (void *opaque,
+				  target_phys_addr_t addr)
+{
+    //PCIBus *s = opaque;
+    uint32_t val;
+
+    switch (addr & 0x3f) {
+    case 0x00: // Control/Status
+    case 0x10: // AFSR
+    case 0x18: // AFAR
+    case 0x20: // Diagnostic
+    case 0x28: // Target address space
+	// XXX
+    default:
+	val = 0;
+	break;
+    }
+    return val;
+}
+
+static CPUWriteMemoryFunc *apb_config_write[] = {
+    &apb_config_writel,
+    &apb_config_writel,
+    &apb_config_writel,
+};
+
+static CPUReadMemoryFunc *apb_config_read[] = {
+    &apb_config_readl,
+    &apb_config_readl,
+    &apb_config_readl,
+};
+
+static void pci_apb_writeb (void *opaque, target_phys_addr_t addr,
+                                  uint32_t val)
+{
+    PCIBus *s = opaque;
+
+    pci_data_write(s, addr & 7, val, 1);
+}
+
+static void pci_apb_writew (void *opaque, target_phys_addr_t addr,
+                                  uint32_t val)
+{
+    PCIBus *s = opaque;
+
+    pci_data_write(s, addr & 7, val, 2);
+}
+
+static void pci_apb_writel (void *opaque, target_phys_addr_t addr,
+                                uint32_t val)
+{
+    PCIBus *s = opaque;
+
+    pci_data_write(s, addr & 7, val, 4);
+}
+
+static uint32_t pci_apb_readb (void *opaque, target_phys_addr_t addr)
+{
+    PCIBus *s = opaque;
+    uint32_t val;
+
+    val = pci_data_read(s, addr & 7, 1);
+    return val;
+}
+
+static uint32_t pci_apb_readw (void *opaque, target_phys_addr_t addr)
+{
+    PCIBus *s = opaque;
+    uint32_t val;
+
+    val = pci_data_read(s, addr & 7, 2);
+    return val;
+}
+
+static uint32_t pci_apb_readl (void *opaque, target_phys_addr_t addr)
+{
+    PCIBus *s = opaque;
+    uint32_t val;
+
+    val = pci_data_read(s, addr, 4);
+    return val;
+}
+
+static CPUWriteMemoryFunc *pci_apb_write[] = {
+    &pci_apb_writeb,
+    &pci_apb_writew,
+    &pci_apb_writel,
+};
+
+static CPUReadMemoryFunc *pci_apb_read[] = {
+    &pci_apb_readb,
+    &pci_apb_readw,
+    &pci_apb_readl,
+};
+
+static void pci_apb_iowriteb (void *opaque, target_phys_addr_t addr,
+                                  uint32_t val)
+{
+    cpu_outb(NULL, addr & 0xffff, val);
+}
+
+static void pci_apb_iowritew (void *opaque, target_phys_addr_t addr,
+                                  uint32_t val)
+{
+    cpu_outw(NULL, addr & 0xffff, val);
+}
+
+static void pci_apb_iowritel (void *opaque, target_phys_addr_t addr,
+                                uint32_t val)
+{
+    cpu_outl(NULL, addr & 0xffff, val);
+}
+
+static uint32_t pci_apb_ioreadb (void *opaque, target_phys_addr_t addr)
+{
+    uint32_t val;
+
+    val = cpu_inb(NULL, addr & 0xffff);
+    return val;
+}
+
+static uint32_t pci_apb_ioreadw (void *opaque, target_phys_addr_t addr)
+{
+    uint32_t val;
+
+    val = cpu_inw(NULL, addr & 0xffff);
+    return val;
+}
+
+static uint32_t pci_apb_ioreadl (void *opaque, target_phys_addr_t addr)
+{
+    uint32_t val;
+
+    val = cpu_inl(NULL, addr & 0xffff);
+    return val;
+}
+
+static CPUWriteMemoryFunc *pci_apb_iowrite[] = {
+    &pci_apb_iowriteb,
+    &pci_apb_iowritew,
+    &pci_apb_iowritel,
+};
+
+static CPUReadMemoryFunc *pci_apb_ioread[] = {
+    &pci_apb_ioreadb,
+    &pci_apb_ioreadw,
+    &pci_apb_ioreadl,
+};
+
+PCIBus *pci_apb_init(target_ulong special_base, target_ulong mem_base)
+{
+    PCIBus *s;
+    PCIDevice *d;
+    int pci_mem_config, pci_mem_data, apb_config, pci_ioport;
+
+    /* Ultrasparc APB main bus */
+    s = pci_register_bus();
+    s->set_irq = pci_set_irq_simple;
+
+    pci_mem_config = cpu_register_io_memory(0, pci_apb_config_read,
+                                            pci_apb_config_write, s);
+    apb_config = cpu_register_io_memory(0, apb_config_read,
+					apb_config_write, s);
+    pci_mem_data = cpu_register_io_memory(0, pci_apb_read,
+                                          pci_apb_write, s);
+    pci_ioport = cpu_register_io_memory(0, pci_apb_ioread,
+                                          pci_apb_iowrite, s);
+
+    cpu_register_physical_memory(special_base + 0x2000ULL, 0x40, apb_config);
+    cpu_register_physical_memory(special_base + 0x1000000ULL, 0x10, pci_mem_config);
+    cpu_register_physical_memory(special_base + 0x2000000ULL, 0x10000, pci_ioport);
+    cpu_register_physical_memory(mem_base, 0x10000000, pci_mem_data); // XXX size should be 4G-prom
+
+    d = pci_register_device(s, "Advanced PCI Bus", sizeof(PCIDevice), 
+                            -1, NULL, NULL);
+    d->config[0x00] = 0x8e; // vendor_id : Sun
+    d->config[0x01] = 0x10;
+    d->config[0x02] = 0x00; // device_id
+    d->config[0x03] = 0xa0;
+    d->config[0x04] = 0x06; // command = bus master, pci mem
+    d->config[0x05] = 0x00;
+    d->config[0x06] = 0xa0; // status = fast back-to-back, 66MHz, no error
+    d->config[0x07] = 0x03; // status = medium devsel
+    d->config[0x08] = 0x00; // revision
+    d->config[0x09] = 0x00; // programming i/f
+    d->config[0x0A] = 0x00; // class_sub = pci host
+    d->config[0x0B] = 0x06; // class_base = PCI_bridge
+    d->config[0x0D] = 0x10; // latency_timer
+    d->config[0x0E] = 0x00; // header_type
+    return s;
+}
+
 /***********************************************************/
 /* generic PCI irq support */
 
diff --git a/hw/sun4u.c b/hw/sun4u.c
index af15464..9c89453 100644
--- a/hw/sun4u.c
+++ b/hw/sun4u.c
@@ -22,23 +22,18 @@
  * THE SOFTWARE.
  */
 #include "vl.h"
-#include "m48t08.h"
+#include "m48t59.h"
 
-#define KERNEL_LOAD_ADDR     0x00004000
-#define CMDLINE_ADDR         0x007ff000
-#define INITRD_LOAD_ADDR     0x00800000
-#define PROM_ADDR	     0xffd00000
+#define KERNEL_LOAD_ADDR     0x00404000
+#define CMDLINE_ADDR         0x003ff000
+#define INITRD_LOAD_ADDR     0x00300000
+#define PROM_ADDR	     0x1fff0000000ULL
+#define APB_SPECIAL_BASE     0x1fe00000000ULL
+#define APB_MEM_BASE	     0x1ff00000000ULL
+#define VGA_BASE	     (APB_MEM_BASE + 0x400000ULL)
 #define PROM_FILENAMEB	     "proll-sparc64.bin"
 #define PROM_FILENAMEE	     "proll-sparc64.elf"
-#define PHYS_JJ_EEPROM	0x71200000	/* m48t08 */
-#define PHYS_JJ_IDPROM_OFF	0x1FD8
-#define PHYS_JJ_EEPROM_SIZE	0x2000
-// IRQs are not PIL ones, but master interrupt controller register
-// bits
-#define PHYS_JJ_MS_KBD	0x71000000	/* Mouse and keyboard */
-#define PHYS_JJ_MS_KBD_IRQ    14
-#define PHYS_JJ_SER	0x71100000	/* Serial */
-#define PHYS_JJ_SER_IRQ    15
+#define NVRAM_SIZE           0x2000
 
 /* TSC handling */
 
@@ -70,79 +65,170 @@
 {
 }
 
-static void nvram_set_word (m48t08_t *nvram, uint32_t addr, uint16_t value)
+/* NVRAM helpers */
+void NVRAM_set_byte (m48t59_t *nvram, uint32_t addr, uint8_t value)
 {
-    m48t08_write(nvram, addr++, (value >> 8) & 0xff);
-    m48t08_write(nvram, addr++, value & 0xff);
+    m48t59_set_addr(nvram, addr);
+    m48t59_write(nvram, value);
 }
 
-static void nvram_set_lword (m48t08_t *nvram, uint32_t addr, uint32_t value)
+uint8_t NVRAM_get_byte (m48t59_t *nvram, uint32_t addr)
 {
-    m48t08_write(nvram, addr++, value >> 24);
-    m48t08_write(nvram, addr++, (value >> 16) & 0xff);
-    m48t08_write(nvram, addr++, (value >> 8) & 0xff);
-    m48t08_write(nvram, addr++, value & 0xff);
+    m48t59_set_addr(nvram, addr);
+    return m48t59_read(nvram);
 }
 
-static void nvram_set_string (m48t08_t *nvram, uint32_t addr,
+void NVRAM_set_word (m48t59_t *nvram, uint32_t addr, uint16_t value)
+{
+    m48t59_set_addr(nvram, addr);
+    m48t59_write(nvram, value >> 8);
+    m48t59_set_addr(nvram, addr + 1);
+    m48t59_write(nvram, value & 0xFF);
+}
+
+uint16_t NVRAM_get_word (m48t59_t *nvram, uint32_t addr)
+{
+    uint16_t tmp;
+
+    m48t59_set_addr(nvram, addr);
+    tmp = m48t59_read(nvram) << 8;
+    m48t59_set_addr(nvram, addr + 1);
+    tmp |= m48t59_read(nvram);
+
+    return tmp;
+}
+
+void NVRAM_set_lword (m48t59_t *nvram, uint32_t addr, uint32_t value)
+{
+    m48t59_set_addr(nvram, addr);
+    m48t59_write(nvram, value >> 24);
+    m48t59_set_addr(nvram, addr + 1);
+    m48t59_write(nvram, (value >> 16) & 0xFF);
+    m48t59_set_addr(nvram, addr + 2);
+    m48t59_write(nvram, (value >> 8) & 0xFF);
+    m48t59_set_addr(nvram, addr + 3);
+    m48t59_write(nvram, value & 0xFF);
+}
+
+uint32_t NVRAM_get_lword (m48t59_t *nvram, uint32_t addr)
+{
+    uint32_t tmp;
+
+    m48t59_set_addr(nvram, addr);
+    tmp = m48t59_read(nvram) << 24;
+    m48t59_set_addr(nvram, addr + 1);
+    tmp |= m48t59_read(nvram) << 16;
+    m48t59_set_addr(nvram, addr + 2);
+    tmp |= m48t59_read(nvram) << 8;
+    m48t59_set_addr(nvram, addr + 3);
+    tmp |= m48t59_read(nvram);
+
+    return tmp;
+}
+
+void NVRAM_set_string (m48t59_t *nvram, uint32_t addr,
                        const unsigned char *str, uint32_t max)
 {
-    unsigned int i;
+    int i;
 
     for (i = 0; i < max && str[i] != '\0'; i++) {
-        m48t08_write(nvram, addr + i, str[i]);
+        m48t59_set_addr(nvram, addr + i);
+        m48t59_write(nvram, str[i]);
     }
-    m48t08_write(nvram, addr + max - 1, '\0');
+    m48t59_set_addr(nvram, addr + max - 1);
+    m48t59_write(nvram, '\0');
 }
 
-static m48t08_t *nvram;
+int NVRAM_get_string (m48t59_t *nvram, uint8_t *dst, uint16_t addr, int max)
+{
+    int i;
+
+    memset(dst, 0, max);
+    for (i = 0; i < max; i++) {
+        dst[i] = NVRAM_get_byte(nvram, addr + i);
+        if (dst[i] == '\0')
+            break;
+    }
+
+    return i;
+}
+
+static uint16_t NVRAM_crc_update (uint16_t prev, uint16_t value)
+{
+    uint16_t tmp;
+    uint16_t pd, pd1, pd2;
+
+    tmp = prev >> 8;
+    pd = prev ^ value;
+    pd1 = pd & 0x000F;
+    pd2 = ((pd >> 4) & 0x000F) ^ pd1;
+    tmp ^= (pd1 << 3) | (pd1 << 8);
+    tmp ^= pd2 | (pd2 << 7) | (pd2 << 12);
+
+    return tmp;
+}
+
+uint16_t NVRAM_compute_crc (m48t59_t *nvram, uint32_t start, uint32_t count)
+{
+    uint32_t i;
+    uint16_t crc = 0xFFFF;
+    int odd;
+
+    odd = count & 1;
+    count &= ~1;
+    for (i = 0; i != count; i++) {
+	crc = NVRAM_crc_update(crc, NVRAM_get_word(nvram, start + i));
+    }
+    if (odd) {
+	crc = NVRAM_crc_update(crc, NVRAM_get_byte(nvram, start + i) << 8);
+    }
+
+    return crc;
+}
 
 extern int nographic;
 
-static void nvram_init(m48t08_t *nvram, uint8_t *macaddr, const char *cmdline,
-		       int boot_device, uint32_t RAM_size,
-		       uint32_t kernel_size,
-		       int width, int height, int depth)
+int sun4u_NVRAM_set_params (m48t59_t *nvram, uint16_t NVRAM_size,
+                          const unsigned char *arch,
+                          uint32_t RAM_size, int boot_device,
+                          uint32_t kernel_image, uint32_t kernel_size,
+                          const char *cmdline,
+                          uint32_t initrd_image, uint32_t initrd_size,
+                          uint32_t NVRAM_image,
+                          int width, int height, int depth)
 {
-    unsigned char tmp = 0;
-    int i, j;
+    uint16_t crc;
 
-    // Try to match PPC NVRAM
-    nvram_set_string(nvram, 0x00, "QEMU_BIOS", 16);
-    nvram_set_lword(nvram,  0x10, 0x00000001); /* structure v1 */
-    // NVRAM_size, arch not applicable
-    m48t08_write(nvram, 0x2F, nographic & 0xff);
-    nvram_set_lword(nvram,  0x30, RAM_size);
-    m48t08_write(nvram, 0x34, boot_device & 0xff);
-    nvram_set_lword(nvram,  0x38, KERNEL_LOAD_ADDR);
-    nvram_set_lword(nvram,  0x3C, kernel_size);
+    /* Set parameters for Open Hack'Ware BIOS */
+    NVRAM_set_string(nvram, 0x00, "QEMU_BIOS", 16);
+    NVRAM_set_lword(nvram,  0x10, 0x00000002); /* structure v2 */
+    NVRAM_set_word(nvram,   0x14, NVRAM_size);
+    NVRAM_set_string(nvram, 0x20, arch, 16);
+    NVRAM_set_byte(nvram,   0x2f, nographic & 0xff);
+    NVRAM_set_lword(nvram,  0x30, RAM_size);
+    NVRAM_set_byte(nvram,   0x34, boot_device);
+    NVRAM_set_lword(nvram,  0x38, kernel_image);
+    NVRAM_set_lword(nvram,  0x3C, kernel_size);
     if (cmdline) {
-	strcpy(phys_ram_base + CMDLINE_ADDR, cmdline);
-	nvram_set_lword(nvram,  0x40, CMDLINE_ADDR);
-        nvram_set_lword(nvram,  0x44, strlen(cmdline));
+        /* XXX: put the cmdline in NVRAM too ? */
+        strcpy(phys_ram_base + CMDLINE_ADDR, cmdline);
+        NVRAM_set_lword(nvram,  0x40, CMDLINE_ADDR);
+        NVRAM_set_lword(nvram,  0x44, strlen(cmdline));
+    } else {
+        NVRAM_set_lword(nvram,  0x40, 0);
+        NVRAM_set_lword(nvram,  0x44, 0);
     }
-    // initrd_image, initrd_size passed differently
-    nvram_set_word(nvram,   0x54, width);
-    nvram_set_word(nvram,   0x56, height);
-    nvram_set_word(nvram,   0x58, depth);
+    NVRAM_set_lword(nvram,  0x48, initrd_image);
+    NVRAM_set_lword(nvram,  0x4C, initrd_size);
+    NVRAM_set_lword(nvram,  0x50, NVRAM_image);
 
-    // Sun4m specific use
-    i = 0x1fd8;
-    m48t08_write(nvram, i++, 0x01);
-    m48t08_write(nvram, i++, 0x80); /* Sun4m OBP */
-    j = 0;
-    m48t08_write(nvram, i++, macaddr[j++]);
-    m48t08_write(nvram, i++, macaddr[j++]);
-    m48t08_write(nvram, i++, macaddr[j++]);
-    m48t08_write(nvram, i++, macaddr[j++]);
-    m48t08_write(nvram, i++, macaddr[j++]);
-    m48t08_write(nvram, i, macaddr[j]);
+    NVRAM_set_word(nvram,   0x54, width);
+    NVRAM_set_word(nvram,   0x56, height);
+    NVRAM_set_word(nvram,   0x58, depth);
+    crc = NVRAM_compute_crc(nvram, 0x00, 0xF8);
+    NVRAM_set_word(nvram,  0xFC, crc);
 
-    /* Calculate checksum */
-    for (i = 0x1fd8; i < 0x1fe7; i++) {
-	tmp ^= m48t08_read(nvram, i);
-    }
-    m48t08_write(nvram, 0x1fe7, tmp);
+    return 0;
 }
 
 void pic_info()
@@ -157,15 +243,7 @@
 {
 }
 
-void vga_update_display()
-{
-}
-
-void vga_invalidate_display()
-{
-}
-
-void vga_screen_dump(const char *filename)
+void pic_set_irq_new(void *opaque, int irq, int level)
 {
 }
 
@@ -173,6 +251,18 @@
 {
 }
 
+static const int ide_iobase[2] = { 0x1f0, 0x170 };
+static const int ide_iobase2[2] = { 0x3f6, 0x376 };
+static const int ide_irq[2] = { 14, 15 };
+
+static const int serial_io[MAX_SERIAL_PORTS] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8 };
+static const int serial_irq[MAX_SERIAL_PORTS] = { 4, 3, 4, 3 };
+
+static const int parallel_io[MAX_PARALLEL_PORTS] = { 0x378, 0x278, 0x3bc };
+static const int parallel_irq[MAX_PARALLEL_PORTS] = { 7, 7, 7 };
+
+static fdctrl_t *floppy_controller;
+
 /* Sun4u hardware initialisation */
 static void sun4u_init(int ram_size, int vga_ram_size, int boot_device,
              DisplayState *ds, const char **fd_filename, int snapshot,
@@ -180,21 +270,18 @@
              const char *initrd_filename)
 {
     char buf[1024];
+    m48t59_t *nvram;
     int ret, linux_boot;
     unsigned int i;
-    long vram_size = 0x100000, prom_offset, initrd_size, kernel_size;
+    long prom_offset, initrd_size, kernel_size;
+    PCIBus *pci_bus;
 
     linux_boot = (kernel_filename != NULL);
 
     /* allocate RAM */
     cpu_register_physical_memory(0, ram_size, 0);
 
-    nvram = m48t08_init(PHYS_JJ_EEPROM, PHYS_JJ_EEPROM_SIZE);
-    // Slavio TTYA (base+4, Linux ttyS0) is the first Qemu serial device
-    // Slavio TTYB (base+0, Linux ttyS1) is the second Qemu serial device
-    slavio_serial_init(PHYS_JJ_SER, PHYS_JJ_SER_IRQ, serial_hds[1], serial_hds[0]);
-
-    prom_offset = ram_size + vram_size;
+    prom_offset = ram_size + vga_ram_size;
 
     snprintf(buf, sizeof(buf), "%s/%s", bios_dir, PROM_FILENAMEE);
     ret = load_elf(buf, phys_ram_base + prom_offset);
@@ -211,6 +298,7 @@
                                  prom_offset | IO_MEM_ROM);
 
     kernel_size = 0;
+    initrd_size = 0;
     if (linux_boot) {
         kernel_size = load_elf(kernel_filename, phys_ram_base + KERNEL_LOAD_ADDR);
         if (kernel_size < 0)
@@ -224,7 +312,6 @@
         }
 
         /* load initrd */
-        initrd_size = 0;
         if (initrd_filename) {
             initrd_size = load_image(initrd_filename, phys_ram_base + INITRD_LOAD_ADDR);
             if (initrd_size < 0) {
@@ -244,7 +331,41 @@
 	    }
         }
     }
-    nvram_init(nvram, (uint8_t *)&nd_table[0].macaddr, kernel_cmdline, boot_device, ram_size, kernel_size, graphic_width, graphic_height, graphic_depth);
+    pci_bus = pci_apb_init(APB_SPECIAL_BASE, APB_MEM_BASE);
+    isa_mem_base = VGA_BASE;
+    vga_initialize(pci_bus, ds, phys_ram_base + ram_size, ram_size, 
+                   vga_ram_size, 0, 0);
+    cpu_register_physical_memory(VGA_BASE, vga_ram_size, ram_size);
+    //pci_cirrus_vga_init(pci_bus, ds, phys_ram_base + ram_size, ram_size, vga_ram_size);
+
+    for(i = 0; i < MAX_SERIAL_PORTS; i++) {
+        if (serial_hds[i]) {
+            serial_init(serial_io[i], serial_irq[i], serial_hds[i]);
+        }
+    }
+
+    for(i = 0; i < MAX_PARALLEL_PORTS; i++) {
+        if (parallel_hds[i]) {
+            parallel_init(parallel_io[i], parallel_irq[i], parallel_hds[i]);
+        }
+    }
+
+    for(i = 0; i < nb_nics; i++) {
+	pci_ne2000_init(pci_bus, &nd_table[i]);
+    }
+
+    pci_cmd646_ide_init(pci_bus, bs_table, 1);
+    kbd_init();
+    floppy_controller = fdctrl_init(6, 2, 0, 0x3f0, fd_table);
+    nvram = m48t59_init(8, 0, 0x0074, NVRAM_SIZE);
+    sun4u_NVRAM_set_params(nvram, NVRAM_SIZE, "Sun4u", ram_size, boot_device,
+                         KERNEL_LOAD_ADDR, kernel_size,
+                         kernel_cmdline,
+                         INITRD_LOAD_ADDR, initrd_size,
+                         /* XXX: need an option to load a NVRAM image */
+                         0,
+                         graphic_width, graphic_height, graphic_depth);
+
 }
 
 QEMUMachine sun4u_machine = {
diff --git a/qemu-doc.texi b/qemu-doc.texi
index 439c0b0..17072a7 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi
@@ -1058,6 +1058,19 @@
 Use the executable @file{qemu-system-sparc64} to simulate a Sun4u machine.
 The emulator is not usable for anything yet.
 
+QEMU emulates the following sun4u peripherals:
+
+@itemize @minus
+@item
+UltraSparc IIi APB PCI Bridge 
+@item
+PCI VGA compatible card with VESA Bochs Extensions
+@item
+Non Volatile RAM M48T59
+@item
+PC-compatible serial ports
+@end itemize
+
 @chapter MIPS System emulator invocation
 
 Use the executable @file{qemu-system-mips} to simulate a MIPS machine.
diff --git a/qemu-tech.texi b/qemu-tech.texi
index 379cbad..95d1787 100644
--- a/qemu-tech.texi
+++ b/qemu-tech.texi
@@ -163,7 +163,6 @@
 @item Atomic instructions are not correctly implemented.
 
 @item Sparc64 emulators are not usable for anything yet.
-Address space is limited to first 4 gigabytes.
 
 @end itemize
 
diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h
index 2eb900d..999d5d7 100644
--- a/target-sparc/cpu.h
+++ b/target-sparc/cpu.h
@@ -6,9 +6,11 @@
 #if !defined(TARGET_SPARC64)
 #define TARGET_LONG_BITS 32
 #define TARGET_FPREGS 32
+#define TARGET_PAGE_BITS 12 /* 4k */
 #else
 #define TARGET_LONG_BITS 64
 #define TARGET_FPREGS 64
+#define TARGET_PAGE_BITS 12 /* XXX */
 #endif
 #define TARGET_FPREG_T float
 
@@ -35,6 +37,7 @@
 #define TT_TRAP     0x80
 #else
 #define TT_TFAULT   0x08
+#define TT_TMISS    0x09
 #define TT_ILL_INSN 0x10
 #define TT_PRIV_INSN 0x11
 #define TT_NFPU_INSN 0x20
@@ -42,6 +45,9 @@
 #define TT_CLRWIN   0x24
 #define TT_DIV_ZERO 0x28
 #define TT_DFAULT   0x30
+#define TT_DMISS    0x31
+#define TT_DPROT    0x32
+#define TT_PRIV_ACT 0x37
 #define TT_EXTINT   0x40
 #define TT_SPILL    0x80
 #define TT_FILL     0xc0
@@ -65,10 +71,14 @@
 #define TBR_BASE_MASK 0xfffff000
 
 #if defined(TARGET_SPARC64)
+#define PS_IG    (1<<11)
+#define PS_MG    (1<<10)
+#define PS_RED   (1<<5)
 #define PS_PEF   (1<<4)
 #define PS_AM    (1<<3)
 #define PS_PRIV  (1<<2)
 #define PS_IE    (1<<1)
+#define PS_AG    (1<<0)
 #endif
 
 /* Fcc */
@@ -166,7 +176,7 @@
        context) */
     unsigned long mem_write_pc; /* host pc at which the memory was
                                    written */
-    unsigned long mem_write_vaddr; /* target virtual addr at which the
+    target_ulong mem_write_vaddr; /* target virtual addr at which the
                                       memory was written */
     /* 0 = kernel, 1 = user (may have 2 = kernel code, 3 = user code ?) */
     CPUTLBEntry tlb_read[2][CPU_TLB_SIZE];
@@ -201,11 +211,13 @@
     uint32_t pstate;
     uint32_t tl;
     uint32_t cansave, canrestore, otherwin, wstate, cleanwin;
-    target_ulong agregs[8]; /* alternate general registers */
-    target_ulong igregs[8]; /* interrupt general registers */
-    target_ulong mgregs[8]; /* mmu general registers */
+    uint64_t agregs[8]; /* alternate general registers */
+    uint64_t bgregs[8]; /* backup for normal global registers */
+    uint64_t igregs[8]; /* interrupt general registers */
+    uint64_t mgregs[8]; /* mmu general registers */
     uint64_t version;
     uint64_t fprs;
+    uint64_t tick_cmpr, stick_cmpr;
 #endif
 #if !defined(TARGET_SPARC64) && !defined(reg_T2)
     target_ulong t2;
@@ -275,7 +287,6 @@
 struct siginfo;
 int cpu_sparc_signal_handler(int hostsignum, struct siginfo *info, void *puc);
 
-#define TARGET_PAGE_BITS 12 /* 4k */
 #include "cpu-all.h"
 
 #endif
diff --git a/target-sparc/exec.h b/target-sparc/exec.h
index c5b73fe..942b811 100644
--- a/target-sparc/exec.h
+++ b/target-sparc/exec.h
@@ -65,6 +65,9 @@
 void do_fcmps_fcc3(void);
 void do_fcmpd_fcc3(void);
 void do_popc();
+void do_wrpstate();
+void do_done();
+void do_retry();
 #endif
 void do_ldd_kernel(target_ulong addr);
 void do_ldd_user(target_ulong addr);
diff --git a/target-sparc/helper.c b/target-sparc/helper.c
index be3d6b9..78a033b 100644
--- a/target-sparc/helper.c
+++ b/target-sparc/helper.c
@@ -1,7 +1,7 @@
 /*
  *  sparc helpers
  * 
- *  Copyright (c) 2003 Fabrice Bellard
+ *  Copyright (c) 2003-2005 Fabrice Bellard
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -28,7 +28,6 @@
 #include "cpu.h"
 #include "exec-all.h"
 
-//#define DEBUG_PCALL
 //#define DEBUG_MMU
 
 /* Sparc MMU emulation */
@@ -62,6 +61,9 @@
 #else
 
 #ifndef TARGET_SPARC64
+/*
+ * Sparc V8 Reference MMU (SRMMU)
+ */
 static const int access_table[8][8] = {
     { 0, 0, 0, 0, 2, 0, 3, 3 },
     { 0, 0, 0, 0, 2, 0, 0, 0 },
@@ -229,6 +231,9 @@
     }
 }
 #else
+/*
+ * UltraSparc IIi I/DMMUs
+ */
 static int get_physical_address_data(CPUState *env, target_phys_addr_t *physical, int *prot,
 			  int *access_index, target_ulong address, int rw,
 			  int is_user)
@@ -237,46 +242,55 @@
     unsigned int i;
 
     if ((env->lsu & DMMU_E) == 0) { /* DMMU disabled */
-	*physical = address & 0xffffffff;
+	*physical = address;
 	*prot = PAGE_READ | PAGE_WRITE;
         return 0;
     }
 
     for (i = 0; i < 64; i++) {
-	if ((env->dtlb_tte[i] & 0x8000000000000000ULL) != 0) {
-	    switch (env->dtlb_tte[i] >> 60) {
-	    default:
-	    case 0x4: // 8k
-		mask = 0xffffffffffffe000ULL;
-		break;
-	    case 0x5: // 64k
-		mask = 0xffffffffffff0000ULL;
-		break;
-	    case 0x6: // 512k
-		mask = 0xfffffffffff80000ULL;
-		break;
-	    case 0x7: // 4M
-		mask = 0xffffffffffc00000ULL;
-		break;
+	switch ((env->dtlb_tte[i] >> 61) & 3) {
+	default:
+	case 0x0: // 8k
+	    mask = 0xffffffffffffe000ULL;
+	    break;
+	case 0x1: // 64k
+	    mask = 0xffffffffffff0000ULL;
+	    break;
+	case 0x2: // 512k
+	    mask = 0xfffffffffff80000ULL;
+	    break;
+	case 0x3: // 4M
+	    mask = 0xffffffffffc00000ULL;
+	    break;
+	}
+	// ctx match, vaddr match?
+	if (env->dmmuregs[1] == (env->dtlb_tag[i] & 0x1fff) &&
+	    (address & mask) == (env->dtlb_tag[i] & ~0x1fffULL)) {
+	    // valid, access ok?
+	    if ((env->dtlb_tte[i] & 0x8000000000000000ULL) == 0 ||
+		((env->dtlb_tte[i] & 0x4) && is_user) ||
+		(!(env->dtlb_tte[i] & 0x2) && (rw == 1))) {
+		if (env->dmmuregs[3]) /* Fault status register */
+		    env->dmmuregs[3] = 2; /* overflow (not read before another fault) */
+		env->dmmuregs[3] |= (is_user << 3) | ((rw == 1) << 2) | 1;
+		env->dmmuregs[4] = address; /* Fault address register */
+		env->exception_index = TT_DFAULT;
+#ifdef DEBUG_MMU
+		printf("DFAULT at 0x%llx\n", address);
+#endif
+		return 1;
 	    }
-	    // ctx match, vaddr match?
-	    if (env->dmmuregs[1] == (env->dtlb_tag[i] & 0x1fff) &&
-		(address & mask) == (env->dtlb_tag[i] & ~0x1fffULL)) {
-		// access ok?
-		if (((env->dtlb_tte[i] & 0x4) && !(env->pstate & PS_PRIV)) ||
-		    (!(env->dtlb_tte[i] & 0x2) && (rw == 1))) {
-		    env->exception_index = TT_DFAULT;
-		    return 1;
-		}
-		*physical = env->dtlb_tte[i] & 0xffffe000;
-		*prot = PAGE_READ;
-		if (env->dtlb_tte[i] & 0x2)
-		    *prot |= PAGE_WRITE;
-		return 0;
-	    }
+	    *physical = (env->dtlb_tte[i] & mask & 0x1fffffff000ULL) + (address & ~mask & 0x1fffffff000ULL);
+	    *prot = PAGE_READ;
+	    if (env->dtlb_tte[i] & 0x2)
+		*prot |= PAGE_WRITE;
+	    return 0;
 	}
     }
-    env->exception_index = TT_DFAULT;
+#ifdef DEBUG_MMU
+    printf("DMISS at 0x%llx\n", address);
+#endif
+    env->exception_index = TT_DMISS;
     return 1;
 }
 
@@ -288,42 +302,51 @@
     unsigned int i;
 
     if ((env->lsu & IMMU_E) == 0) { /* IMMU disabled */
-	*physical = address & 0xffffffff;
+	*physical = address;
 	*prot = PAGE_READ;
         return 0;
     }
+
     for (i = 0; i < 64; i++) {
-	if ((env->itlb_tte[i] & 0x8000000000000000ULL) != 0) {
-	    switch (env->itlb_tte[i] >> 60) {
-	    default:
-	    case 0x4: // 8k
-		mask = 0xffffffffffffe000ULL;
+	switch ((env->itlb_tte[i] >> 61) & 3) {
+	default:
+	case 0x0: // 8k
+	    mask = 0xffffffffffffe000ULL;
+	    break;
+	case 0x1: // 64k
+	    mask = 0xffffffffffff0000ULL;
+	    break;
+	case 0x2: // 512k
+	    mask = 0xfffffffffff80000ULL;
+	    break;
+	case 0x3: // 4M
+	    mask = 0xffffffffffc00000ULL;
 		break;
-	    case 0x5: // 64k
-		mask = 0xffffffffffff0000ULL;
-		break;
-	    case 0x6: // 512k
-		mask = 0xfffffffffff80000ULL;
-		break;
-	    case 0x7: // 4M
-		mask = 0xffffffffffc00000ULL;
-		break;
+	}
+	// ctx match, vaddr match?
+	if (env->dmmuregs[1] == (env->itlb_tag[i] & 0x1fff) &&
+	    (address & mask) == (env->itlb_tag[i] & ~0x1fffULL)) {
+	    // valid, access ok?
+	    if ((env->itlb_tte[i] & 0x8000000000000000ULL) == 0 ||
+		((env->itlb_tte[i] & 0x4) && is_user)) {
+		if (env->immuregs[3]) /* Fault status register */
+		    env->immuregs[3] = 2; /* overflow (not read before another fault) */
+		env->immuregs[3] |= (is_user << 3) | 1;
+		env->exception_index = TT_TFAULT;
+#ifdef DEBUG_MMU
+		printf("TFAULT at 0x%llx\n", address);
+#endif
+		return 1;
 	    }
-	    // ctx match, vaddr match?
-	    if (env->immuregs[1] == (env->itlb_tag[i] & 0x1fff) &&
-		(address & mask) == (env->itlb_tag[i] & ~0x1fffULL)) {
-		// access ok?
-		if ((env->itlb_tte[i] & 0x4) && !(env->pstate & PS_PRIV)) {
-		    env->exception_index = TT_TFAULT;
-		    return 1;
-		}
-		*physical = env->itlb_tte[i] & 0xffffe000;
-		*prot = PAGE_READ;
-		return 0;
-	    }
+	    *physical = (env->itlb_tte[i] & mask & 0x1fffffff000ULL) + (address & ~mask & 0x1fffffff000ULL);
+	    *prot = PAGE_READ;
+	    return 0;
 	}
     }
-    env->exception_index = TT_TFAULT;
+#ifdef DEBUG_MMU
+    printf("TMISS at 0x%llx\n", address);
+#endif
+    env->exception_index = TT_TMISS;
     return 1;
 }
 
@@ -341,15 +364,17 @@
 int cpu_sparc_handle_mmu_fault (CPUState *env, target_ulong address, int rw,
                               int is_user, int is_softmmu)
 {
-    target_ulong virt_addr;
+    target_ulong virt_addr, vaddr;
     target_phys_addr_t paddr;
-    unsigned long vaddr;
     int error_code = 0, prot, ret = 0, access_index;
 
     error_code = get_physical_address(env, &paddr, &prot, &access_index, address, rw, is_user);
     if (error_code == 0) {
 	virt_addr = address & TARGET_PAGE_MASK;
 	vaddr = virt_addr + ((address & TARGET_PAGE_MASK) & (TARGET_PAGE_SIZE - 1));
+#ifdef DEBUG_MMU
+	printf("Translate at 0x%llx -> 0x%llx, vaddr 0x%llx\n", address, paddr, vaddr);
+#endif
 	ret = tlb_set_page(env, vaddr, paddr, prot, is_user, is_softmmu);
 	return ret;
     }
@@ -471,4 +496,77 @@
     printf("MMU dump ends\n");
 }
 #endif
+#else
+#ifdef DEBUG_MMU
+void dump_mmu(CPUState *env)
+{
+    unsigned int i;
+    const char *mask;
+
+    printf("MMU contexts: Primary: %lld, Secondary: %lld\n", env->dmmuregs[1], env->dmmuregs[2]);
+    if ((env->lsu & DMMU_E) == 0) {
+	printf("DMMU disabled\n");
+    } else {
+	printf("DMMU dump:\n");
+	for (i = 0; i < 64; i++) {
+	    switch ((env->dtlb_tte[i] >> 61) & 3) {
+	    default:
+	    case 0x0:
+		mask = "  8k";
+		break;
+	    case 0x1:
+		mask = " 64k";
+		break;
+	    case 0x2:
+		mask = "512k";
+		break;
+	    case 0x3:
+		mask = "  4M";
+		break;
+	    }
+	    if ((env->dtlb_tte[i] & 0x8000000000000000ULL) != 0) {
+		printf("VA: " TARGET_FMT_lx ", PA: " TARGET_FMT_lx ", %s, %s, %s, %s, ctx %lld\n",
+		       env->dtlb_tag[i] & ~0x1fffULL,
+		       env->dtlb_tte[i] & 0x1ffffffe000ULL,
+		       mask,
+		       env->dtlb_tte[i] & 0x4? "priv": "user",
+		       env->dtlb_tte[i] & 0x2? "RW": "RO",
+		       env->dtlb_tte[i] & 0x40? "locked": "unlocked",
+		       env->dtlb_tag[i] & 0x1fffULL);
+	    }
+	}
+    }
+    if ((env->lsu & IMMU_E) == 0) {
+	printf("IMMU disabled\n");
+    } else {
+	printf("IMMU dump:\n");
+	for (i = 0; i < 64; i++) {
+	    switch ((env->itlb_tte[i] >> 61) & 3) {
+	    default:
+	    case 0x0:
+		mask = "  8k";
+		break;
+	    case 0x1:
+		mask = " 64k";
+		break;
+	    case 0x2:
+		mask = "512k";
+		break;
+	    case 0x3:
+		mask = "  4M";
+		break;
+	    }
+	    if ((env->itlb_tte[i] & 0x8000000000000000ULL) != 0) {
+		printf("VA: " TARGET_FMT_lx ", PA: " TARGET_FMT_lx ", %s, %s, %s, ctx %lld\n",
+		       env->itlb_tag[i] & ~0x1fffULL,
+		       env->itlb_tte[i] & 0x1ffffffe000ULL,
+		       mask,
+		       env->itlb_tte[i] & 0x4? "priv": "user",
+		       env->itlb_tte[i] & 0x40? "locked": "unlocked",
+		       env->itlb_tag[i] & 0x1fffULL);
+	    }
+	}
+    }
+}
+#endif
 #endif
diff --git a/target-sparc/op.c b/target-sparc/op.c
index 86c45c7..f8b491a 100644
--- a/target-sparc/op.c
+++ b/target-sparc/op.c
@@ -267,15 +267,6 @@
 #endif
 
 #ifdef TARGET_SPARC64
-#undef JUMP_TB
-#define JUMP_TB(opname, tbparam, n, eip)	\
-    do {					\
-	GOTO_TB(opname, tbparam, n);		\
-	T0 = (long)(tbparam) + (n);		\
-	env->pc = (eip) & 0xffffffff;		\
-	EXIT_TB();				\
-    } while (0)
-
 #ifdef WORDS_BIGENDIAN
 typedef union UREG64 {
     struct { uint16_t v3, v2, v1, v0; } w;
@@ -388,7 +379,7 @@
 	env->psr |= PSR_ZERO;
     if ((int32_t) T0 < 0)
 	env->psr |= PSR_NEG;
-    if ((T0 & 0xffffffff) < (src1 & 0xffffffff))
+    if ((src1 & 0xffffffff) < (T1 & 0xffffffff))
 	env->psr |= PSR_CARRY;
     if ((((src1 & 0xffffffff) ^ (T1 & 0xffffffff) ^ -1) &
 	 ((src1 & 0xffffffff) ^ (T0 & 0xffffffff))) & (1 << 31))
@@ -433,7 +424,7 @@
 	env->psr |= PSR_ZERO;
     if ((int32_t) T0 < 0)
 	env->psr |= PSR_NEG;
-    if ((T0 & 0xffffffff) < (src1 & 0xffffffff))
+    if ((src1 & 0xffffffff) < (T1 & 0xffffffff))
 	env->psr |= PSR_CARRY;
     if ((((src1 & 0xffffffff) ^ (T1 & 0xffffffff) ^ -1) &
 	 ((src1 & 0xffffffff) ^ (T0 & 0xffffffff))) & (1 << 31))
@@ -478,7 +469,7 @@
 	env->psr |= PSR_ZERO;
     if ((int32_t) T0 < 0)
 	env->psr |= PSR_NEG;
-    if ((T0 & 0xffffffff) < (src1 & 0xffffffff))
+    if ((src1 & 0xffffffff) < (T1 & 0xffffffff))
 	env->psr |= PSR_CARRY;
     if ((((src1 & 0xffffffff) ^ (T1 & 0xffffffff)) &
 	 ((src1 & 0xffffffff) ^ (T0 & 0xffffffff))) & (1 << 31))
@@ -523,7 +514,7 @@
 	env->psr |= PSR_ZERO;
     if ((int32_t) T0 < 0)
 	env->psr |= PSR_NEG;
-    if ((T0 & 0xffffffff) < (src1 & 0xffffffff))
+    if ((src1 & 0xffffffff) < (T1 & 0xffffffff))
 	env->psr |= PSR_CARRY;
     if ((((src1 & 0xffffffff) ^ (T1 & 0xffffffff)) &
 	 ((src1 & 0xffffffff) ^ (T0 & 0xffffffff))) & (1 << 31))
@@ -585,7 +576,11 @@
 {
     uint64_t res;
     res = (uint64_t) T0 * (uint64_t) T1;
+#ifdef TARGET_SPARC64
+    T0 = res;
+#else
     T0 = res & 0xffffffff;
+#endif
     env->y = res >> 32;
 }
 
@@ -593,7 +588,11 @@
 {
     uint64_t res;
     res = (int64_t) ((int32_t) T0) * (int64_t) ((int32_t) T1);
+#ifdef TARGET_SPARC64
+    T0 = res;
+#else
     T0 = res & 0xffffffff;
+#endif
     env->y = res >> 32;
 }
 
@@ -902,7 +901,7 @@
 
 void OPPROTO op_wrpstate(void)
 {
-    env->pstate = T0 & 0x1f;
+    do_wrpstate();
 }
 
 // CWP handling is reversed in V9, but we still use the V8 register
@@ -1201,12 +1200,12 @@
 #ifdef TARGET_SPARC64
 void OPPROTO op_eval_brz(void)
 {
-    T2 = T0;
+    T2 = (T0 == 0);
 }
 
 void OPPROTO op_eval_brnz(void)
 {
-    T2 = !T0;
+    T2 = (T0 != 0);
 }
 
 void OPPROTO op_eval_brlz(void)
@@ -1266,43 +1265,32 @@
     env->npc = env->npc + 4;
 }
 
-void OPPROTO op_branch(void)
+void OPPROTO op_goto_tb0(void)
 {
-    env->npc = (uint32_t)PARAM3; /* XXX: optimize */
-    JUMP_TB(op_branch, PARAM1, 0, PARAM2);
+    GOTO_TB(op_goto_tb0, PARAM1, 0);
 }
 
-void OPPROTO op_branch2(void)
+void OPPROTO op_goto_tb1(void)
 {
-    if (T2) {
-        env->npc = (uint32_t)PARAM2 + 4; 
-        JUMP_TB(op_branch2, PARAM1, 0, PARAM2);
-    } else {
-        env->npc = (uint32_t)PARAM3 + 4; 
-        JUMP_TB(op_branch2, PARAM1, 1, PARAM3);
-    }
+    GOTO_TB(op_goto_tb1, PARAM1, 1);
+}
+
+void OPPROTO op_jmp_label(void)
+{
+    GOTO_LABEL_PARAM(1);
+}
+
+void OPPROTO op_jnz_T2_label(void)
+{
+    if (T2)
+        GOTO_LABEL_PARAM(1);
     FORCE_RET();
 }
 
-void OPPROTO op_branch_a(void)
+void OPPROTO op_jz_T2_label(void)
 {
-    if (T2) {
-	env->npc = (uint32_t)PARAM2; /* XXX: optimize */
-        JUMP_TB(op_branch_a, PARAM1, 0, PARAM3);
-    } else {
-	env->npc = (uint32_t)PARAM3 + 8; /* XXX: optimize */
-        JUMP_TB(op_branch_a, PARAM1, 1, PARAM3 + 4);
-    }
-    FORCE_RET();
-}
-
-void OPPROTO op_generic_branch(void)
-{
-    if (T2) {
-	env->npc = (uint32_t)PARAM1;
-    } else {
-	env->npc = (uint32_t)PARAM2;
-    }
+    if (!T2)
+        GOTO_LABEL_PARAM(1);
     FORCE_RET();
 }
 
@@ -1547,18 +1535,12 @@
 
 void OPPROTO op_done(void)
 {
-    env->pc = env->tnpc[env->tl];
-    env->npc = env->tnpc[env->tl] + 4;
-    env->pstate = env->tstate[env->tl];
-    env->tl--;
+    do_done();
 }
 
 void OPPROTO op_retry(void)
 {
-    env->pc = env->tpc[env->tl];
-    env->npc = env->tnpc[env->tl];
-    env->pstate = env->tstate[env->tl];
-    env->tl--;
+    do_retry();
 }
 
 void OPPROTO op_sir(void)
diff --git a/target-sparc/op_helper.c b/target-sparc/op_helper.c
index 15ce853..468bbb6 100644
--- a/target-sparc/op_helper.c
+++ b/target-sparc/op_helper.c
@@ -1,5 +1,6 @@
 #include "exec.h"
 
+//#define DEBUG_PCALL
 //#define DEBUG_MMU
 
 void raise_exception(int tt)
@@ -223,7 +224,7 @@
 #ifndef TARGET_SPARC64
 void helper_ld_asi(int asi, int size, int sign)
 {
-    uint32_t ret;
+    uint32_t ret = 0;
 
     switch (asi) {
     case 3: /* MMU probe */
@@ -299,7 +300,8 @@
 	}
     case 4: /* write MMU regs */
 	{
-	    int reg = (T0 >> 8) & 0xf, oldreg;
+	    int reg = (T0 >> 8) & 0xf;
+	    uint32_t oldreg;
 	    
 	    oldreg = env->mmuregs[reg];
             switch(reg) {
@@ -339,7 +341,7 @@
 	    // value (T1) = src
 	    // address (T0) = dst
 	    // copy 32 bytes
-	    int src = T1, dst = T0;
+	    uint32_t src = T1, dst = T0;
 	    uint8_t temp[32];
 	    
 	    tswap32s(&src);
@@ -353,7 +355,8 @@
 	    // value (T1, T2)
 	    // address (T0) = dst
 	    // fill 32 bytes
-	    int i, dst = T0;
+	    int i;
+	    uint32_t dst = T0;
 	    uint64_t val;
 	    
 	    val = (((uint64_t)T1) << 32) | T2;
@@ -366,7 +369,7 @@
 	return;
     case 0x20 ... 0x2f: /* MMU passthrough */
 	{
-	    int temp = T1;
+	    uint32_t temp = T1;
 	    if (size == 4)
 		tswap32s(&temp);
 	    else if (size == 2)
@@ -383,10 +386,10 @@
 
 void helper_ld_asi(int asi, int size, int sign)
 {
-    uint64_t ret;
+    uint64_t ret = 0;
 
     if (asi < 0x80 && (env->pstate & PS_PRIV) == 0)
-	raise_exception(TT_PRIV_INSN);
+	raise_exception(TT_PRIV_ACT);
 
     switch (asi) {
     case 0x14: // Bypass
@@ -401,8 +404,23 @@
 		tswap16s((uint16_t *)&ret);
 	    break;
 	}
+    case 0x04: // Nucleus
+    case 0x0c: // Nucleus Little Endian (LE)
+    case 0x10: // As if user primary
+    case 0x11: // As if user secondary
+    case 0x18: // As if user primary LE
+    case 0x19: // As if user secondary LE
     case 0x1c: // Bypass LE
     case 0x1d: // Bypass, non-cacheable LE
+    case 0x24: // Nucleus quad LDD 128 bit atomic
+    case 0x2c: // Nucleus quad LDD 128 bit atomic
+    case 0x4a: // UPA config
+    case 0x82: // Primary no-fault
+    case 0x83: // Secondary no-fault
+    case 0x88: // Primary LE
+    case 0x89: // Secondary LE
+    case 0x8a: // Primary no-fault LE
+    case 0x8b: // Secondary no-fault LE
 	// XXX
 	break;
     case 0x45: // LSU
@@ -418,8 +436,22 @@
     case 0x51: // I-MMU 8k TSB pointer
     case 0x52: // I-MMU 64k TSB pointer
     case 0x55: // I-MMU data access
-    case 0x56: // I-MMU tag read
+	// XXX
 	break;
+    case 0x56: // I-MMU tag read
+	{
+	    unsigned int i;
+	    
+	    for (i = 0; i < 64; i++) {
+		// Valid, ctx match, vaddr match
+		if ((env->itlb_tte[i] & 0x8000000000000000ULL) != 0 &&
+		    env->itlb_tag[i] == T0) {
+		    ret = env->itlb_tag[i];
+		    break;
+		}
+	    }
+	    break;
+	}
     case 0x58: // D-MMU regs
 	{
 	    int reg = (T0 >> 3) & 0xf;
@@ -427,16 +459,34 @@
 	    ret = env->dmmuregs[reg];
 	    break;
 	}
+    case 0x5e: // D-MMU tag read
+	{
+	    unsigned int i;
+	    
+	    for (i = 0; i < 64; i++) {
+		// Valid, ctx match, vaddr match
+		if ((env->dtlb_tte[i] & 0x8000000000000000ULL) != 0 &&
+		    env->dtlb_tag[i] == T0) {
+		    ret = env->dtlb_tag[i];
+		    break;
+		}
+	    }
+	    break;
+	}
     case 0x59: // D-MMU 8k TSB pointer
     case 0x5a: // D-MMU 64k TSB pointer
     case 0x5b: // D-MMU data pointer
     case 0x5d: // D-MMU data access
-    case 0x5e: // D-MMU tag read
+    case 0x48: // Interrupt dispatch, RO
+    case 0x49: // Interrupt data receive
+    case 0x7f: // Incoming interrupt vector, RO
+	// XXX
 	break;
     case 0x54: // I-MMU data in, WO
     case 0x57: // I-MMU demap, WO
     case 0x5c: // D-MMU data in, WO
     case 0x5f: // D-MMU demap, WO
+    case 0x77: // Interrupt vector, WO
     default:
 	ret = 0;
 	break;
@@ -447,7 +497,7 @@
 void helper_st_asi(int asi, int size, int sign)
 {
     if (asi < 0x80 && (env->pstate & PS_PRIV) == 0)
-	raise_exception(TT_PRIV_INSN);
+	raise_exception(TT_PRIV_ACT);
 
     switch(asi) {
     case 0x14: // Bypass
@@ -463,8 +513,19 @@
 	    cpu_physical_memory_write(T0, (void *) &temp, size);
 	}
 	return;
+    case 0x04: // Nucleus
+    case 0x0c: // Nucleus Little Endian (LE)
+    case 0x10: // As if user primary
+    case 0x11: // As if user secondary
+    case 0x18: // As if user primary LE
+    case 0x19: // As if user secondary LE
     case 0x1c: // Bypass LE
     case 0x1d: // Bypass, non-cacheable LE
+    case 0x24: // Nucleus quad LDD 128 bit atomic
+    case 0x2c: // Nucleus quad LDD 128 bit atomic
+    case 0x4a: // UPA config
+    case 0x88: // Primary LE
+    case 0x89: // Secondary LE
 	// XXX
 	return;
     case 0x45: // LSU
@@ -475,8 +536,13 @@
 	    env->lsu = T1 & (DMMU_E | IMMU_E);
 	    // Mappings generated during D/I MMU disabled mode are
 	    // invalid in normal mode
-	    if (oldreg != env->lsu)
+	    if (oldreg != env->lsu) {
+#ifdef DEBUG_MMU
+                printf("LSU change: 0x%llx -> 0x%llx\n", oldreg, env->lsu);
+		dump_mmu(env);
+#endif
 		tlb_flush(env, 1);
+	    }
 	    return;
 	}
     case 0x50: // I-MMU regs
@@ -506,7 +572,7 @@
 	    env->immuregs[reg] = T1;
 #ifdef DEBUG_MMU
             if (oldreg != env->immuregs[reg]) {
-                printf("mmu change reg[%d]: 0x%08x -> 0x%08x\n", reg, oldreg, env->immuregs[reg]);
+                printf("mmu change reg[%d]: 0x%08llx -> 0x%08llx\n", reg, oldreg, env->immuregs[reg]);
             }
 	    dump_mmu(env);
 #endif
@@ -544,6 +610,7 @@
 	    return;
 	}
     case 0x57: // I-MMU demap
+	// XXX
 	return;
     case 0x58: // D-MMU regs
 	{
@@ -574,7 +641,7 @@
 	    env->dmmuregs[reg] = T1;
 #ifdef DEBUG_MMU
             if (oldreg != env->dmmuregs[reg]) {
-                printf("mmu change reg[%d]: 0x%08x -> 0x%08x\n", reg, oldreg, env->dmmuregs[reg]);
+                printf("mmu change reg[%d]: 0x%08llx -> 0x%08llx\n", reg, oldreg, env->dmmuregs[reg]);
             }
 	    dump_mmu(env);
 #endif
@@ -612,6 +679,8 @@
 	    return;
 	}
     case 0x5f: // D-MMU demap
+    case 0x49: // Interrupt data receive
+	// XXX
 	return;
     case 0x51: // I-MMU 8k TSB pointer, RO
     case 0x52: // I-MMU 64k TSB pointer, RO
@@ -620,6 +689,12 @@
     case 0x5a: // D-MMU 64k TSB pointer, RO
     case 0x5b: // D-MMU data pointer, RO
     case 0x5e: // D-MMU tag read, RO
+    case 0x48: // Interrupt dispatch, RO
+    case 0x7f: // Incoming interrupt vector, RO
+    case 0x82: // Primary no-fault, RO
+    case 0x83: // Secondary no-fault, RO
+    case 0x8a: // Primary no-fault LE, RO
+    case 0x8b: // Secondary no-fault LE, RO
     default:
 	return;
     }
@@ -704,6 +779,61 @@
     T0 = (T0 & 0x0000ffff0000ffffULL) + ((T0 >> 16) & 0x0000ffff0000ffffULL);
     T0 = (T0 & 0x00000000ffffffffULL) + ((T0 >> 32) & 0x00000000ffffffffULL);
 }
+
+static inline uint64_t *get_gregset(uint64_t pstate)
+{
+    switch (pstate) {
+    default:
+    case 0:
+	return env->bgregs;
+    case PS_AG:
+	return env->agregs;
+    case PS_MG:
+	return env->mgregs;
+    case PS_IG:
+	return env->igregs;
+    }
+}
+
+void do_wrpstate()
+{
+    uint64_t new_pstate, pstate_regs, new_pstate_regs;
+    uint64_t *src, *dst;
+
+    new_pstate = T0 & 0xf3f;
+    pstate_regs = env->pstate & 0xc01;
+    new_pstate_regs = new_pstate & 0xc01;
+    if (new_pstate_regs != pstate_regs) {
+	// Switch global register bank
+	src = get_gregset(new_pstate_regs);
+	dst = get_gregset(pstate_regs);
+	memcpy32(dst, env->gregs);
+	memcpy32(env->gregs, src);
+    }
+    env->pstate = new_pstate;
+}
+
+void do_done(void)
+{
+    env->tl--;
+    env->pc = env->tnpc[env->tl];
+    env->npc = env->tnpc[env->tl] + 4;
+    PUT_CCR(env, env->tstate[env->tl] >> 32);
+    env->asi = (env->tstate[env->tl] >> 24) & 0xff;
+    env->pstate = (env->tstate[env->tl] >> 8) & 0xfff;
+    set_cwp(env->tstate[env->tl] & 0xff);
+}
+
+void do_retry(void)
+{
+    env->tl--;
+    env->pc = env->tpc[env->tl];
+    env->npc = env->tnpc[env->tl];
+    PUT_CCR(env, env->tstate[env->tl] >> 32);
+    env->asi = (env->tstate[env->tl] >> 24) & 0xff;
+    env->pstate = (env->tstate[env->tl] >> 8) & 0xfff;
+    set_cwp(env->tstate[env->tl] & 0xff);
+}
 #endif
 
 void set_cwp(int new_cwp)
@@ -744,7 +874,7 @@
 #ifdef DEBUG_PCALL
     if (loglevel & CPU_LOG_INT) {
 	static int count;
-	fprintf(logfile, "%6d: v=%02x pc=%08x npc=%08x SP=%08x\n",
+	fprintf(logfile, "%6d: v=%04x pc=%016llx npc=%016llx SP=%016llx\n",
                 count, intno,
                 env->pc,
                 env->npc, env->regwptr[6]);
@@ -766,8 +896,8 @@
     }
 #endif
 #if !defined(CONFIG_USER_ONLY) 
-    if (env->pstate & PS_IE) {
-        cpu_abort(cpu_single_env, "Trap 0x%02x while interrupts disabled, Error state", env->exception_index);
+    if (env->tl == MAXTL) {
+        cpu_abort(cpu_single_env, "Trap 0x%04x while trap level is MAXTL, Error state", env->exception_index);
 	return;
     }
 #endif
@@ -776,8 +906,16 @@
     env->tpc[env->tl] = env->pc;
     env->tnpc[env->tl] = env->npc;
     env->tt[env->tl] = intno;
-    env->tbr = env->tbr | (env->tl > 1) ? 1 << 14 : 0 | (intno << 4);
-    env->tl++;
+    env->pstate = PS_PEF | PS_PRIV | PS_AG;
+    env->tbr &= ~0x7fffULL;
+    env->tbr |= ((env->tl > 1) ? 1 << 14 : 0) | (intno << 5);
+    if (env->tl < MAXTL - 1) {
+	env->tl++;
+    } else {
+	env->pstate |= PS_RED;
+	if (env->tl != MAXTL)
+	    env->tl++;
+    }
     env->pc = env->tbr;
     env->npc = env->pc + 4;
     env->exception_index = 0;
diff --git a/target-sparc/op_mem.h b/target-sparc/op_mem.h
index 2407c15..f5dbd26 100644
--- a/target-sparc/op_mem.h
+++ b/target-sparc/op_mem.h
@@ -105,20 +105,10 @@
 
 void OPPROTO glue(op_ldsw, MEMSUFFIX)(void)
 {
-    T1 = (int64_t)glue(ldl, MEMSUFFIX)(T0);
+    T1 = (int64_t)(glue(ldl, MEMSUFFIX)(T0) & 0xffffffff);
 }
 
-void OPPROTO glue(op_ldx, MEMSUFFIX)(void)
-{
-    // XXX
-    T1 = (uint64_t)glue(ldl, MEMSUFFIX)(T0) << 32;
-    T1 |= glue(ldl, MEMSUFFIX)(T0);
-}
-
-void OPPROTO glue(op_stx, MEMSUFFIX)(void)
-{
-    glue(stl, MEMSUFFIX)(T0, T1 >> 32);
-    glue(stl, MEMSUFFIX)(T0, T1 & 0xffffffff);
-}
+SPARC_LD_OP(ldx, ldq);
+SPARC_ST_OP(stx, stq);
 #endif
 #undef MEMSUFFIX
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index e1c0272..c2ba2e3 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -86,6 +86,12 @@
 #define DFPREG(r) (r)
 #endif
 
+#ifdef USE_DIRECT_JUMP
+#define TBPARAM(x)
+#else
+#define TBPARAM(x) (long)(x)
+#endif
+
 static int sign_extend(int x, int len)
 {
     len = 32 - len;
@@ -462,7 +468,7 @@
 
 static inline void gen_movl_imm_TN(int reg, uint32_t imm)
 {
-    gen_op_movl_TN_im[reg] (imm);
+    gen_op_movl_TN_im[reg](imm);
 }
 
 static inline void gen_movl_imm_T1(uint32_t val)
@@ -529,15 +535,6 @@
     gen_movl_TN_reg(reg, 1);
 }
 
-/* call this function before using T2 as it may have been set for a jump */
-static inline void flush_T2(DisasContext * dc)
-{
-    if (dc->npc == JUMP_PC) {
-        gen_op_generic_branch(dc->jump_pc[0], dc->jump_pc[1]);
-        dc->npc = DYNAMIC_PC;
-    }
-}
-
 static inline void gen_jmp_im(target_ulong pc)
 {
 #ifdef TARGET_SPARC64
@@ -564,10 +561,88 @@
 #endif
 }
 
+static inline void gen_branch2(DisasContext *dc, long tb, target_ulong pc1, target_ulong pc2)
+{
+    int l1;
+
+    l1 = gen_new_label();
+
+    gen_op_jz_T2_label(l1);
+
+    gen_op_goto_tb0(TBPARAM(tb));
+    gen_jmp_im(pc1);
+    gen_movl_npc_im(pc1 + 4);
+    gen_op_movl_T0_im((long)tb + 0);
+    gen_op_exit_tb();
+
+    gen_set_label(l1);
+    gen_op_goto_tb1(TBPARAM(tb));
+    gen_jmp_im(pc2);
+    gen_movl_npc_im(pc2 + 4);
+    gen_op_movl_T0_im((long)tb + 1);
+    gen_op_exit_tb();
+}
+
+static inline void gen_branch_a(DisasContext *dc, long tb, target_ulong pc1, target_ulong pc2)
+{
+    int l1;
+
+    l1 = gen_new_label();
+
+    gen_op_jz_T2_label(l1);
+
+    gen_op_goto_tb0(TBPARAM(tb));
+    gen_jmp_im(pc2);
+    gen_movl_npc_im(pc1);
+    gen_op_movl_T0_im((long)tb + 0);
+    gen_op_exit_tb();
+
+    gen_set_label(l1);
+    gen_op_goto_tb1(TBPARAM(tb));
+    gen_jmp_im(pc2 + 4);
+    gen_movl_npc_im(pc2 + 8);
+    gen_op_movl_T0_im((long)tb + 1);
+    gen_op_exit_tb();
+}
+
+static inline void gen_branch(DisasContext *dc, long tb, target_ulong pc, target_ulong npc)
+{
+    gen_op_goto_tb0(TBPARAM(tb));
+    gen_jmp_im(pc);
+    gen_movl_npc_im(npc);
+    gen_op_movl_T0_im((long)tb + 0);
+    gen_op_exit_tb();
+}
+
+static inline void gen_generic_branch(DisasContext *dc, target_ulong npc1, target_ulong npc2)
+{
+    int l1, l2;
+
+    l1 = gen_new_label();
+    l2 = gen_new_label();
+    gen_op_jz_T2_label(l1);
+
+    gen_movl_npc_im(npc1);
+    gen_op_jmp_label(l2);
+
+    gen_set_label(l1);
+    gen_movl_npc_im(npc2);
+    gen_set_label(l2);
+}
+
+/* call this function before using T2 as it may have been set for a jump */
+static inline void flush_T2(DisasContext * dc)
+{
+    if (dc->npc == JUMP_PC) {
+        gen_generic_branch(dc, dc->jump_pc[0], dc->jump_pc[1]);
+        dc->npc = DYNAMIC_PC;
+    }
+}
+
 static inline void save_npc(DisasContext * dc)
 {
     if (dc->npc == JUMP_PC) {
-        gen_op_generic_branch(dc->jump_pc[0], dc->jump_pc[1]);
+        gen_generic_branch(dc, dc->jump_pc[0], dc->jump_pc[1]);
         dc->npc = DYNAMIC_PC;
     } else if (dc->npc != DYNAMIC_PC) {
         gen_movl_npc_im(dc->npc);
@@ -583,7 +658,7 @@
 static inline void gen_mov_pc_npc(DisasContext * dc)
 {
     if (dc->npc == JUMP_PC) {
-        gen_op_generic_branch(dc->jump_pc[0], dc->jump_pc[1]);
+        gen_generic_branch(dc, dc->jump_pc[0], dc->jump_pc[1]);
         gen_op_mov_pc_npc();
         dc->pc = DYNAMIC_PC;
     } else if (dc->npc == DYNAMIC_PC) {
@@ -769,7 +844,7 @@
         flush_T2(dc);
         gen_cond[cc][cond]();
 	if (a) {
-	    gen_op_branch_a((long)dc->tb, target, dc->npc);
+	    gen_branch_a(dc, (long)dc->tb, target, dc->npc);
             dc->is_br = 1;
 	} else {
             dc->pc = dc->npc;
@@ -808,7 +883,7 @@
         flush_T2(dc);
         gen_fcond[cc][cond]();
 	if (a) {
-	    gen_op_branch_a((long)dc->tb, target, dc->npc);
+	    gen_branch_a(dc, (long)dc->tb, target, dc->npc);
             dc->is_br = 1;
 	} else {
             dc->pc = dc->npc;
@@ -829,7 +904,7 @@
     flush_T2(dc);
     gen_cond_reg(cond);
     if (a) {
-	gen_op_branch_a((long)dc->tb, target, dc->npc);
+	gen_branch_a(dc, (long)dc->tb, target, dc->npc);
 	dc->is_br = 1;
     } else {
 	dc->pc = dc->npc;
@@ -893,7 +968,7 @@
 		    target <<= 2;
 		    target = sign_extend(target, 16);
 		    rs1 = GET_FIELD(insn, 13, 17);
-		    gen_movl_T0_reg(rs1);
+		    gen_movl_reg_T0(rs1);
 		    do_branch_reg(dc, target, insn);
 		    goto jmp_insn;
 		}
@@ -952,7 +1027,15 @@
 	/*CALL*/ {
 	    target_long target = GET_FIELDs(insn, 2, 31) << 2;
 
+#ifdef TARGET_SPARC64
+	    if (dc->pc == (uint32_t)dc->pc) {
+		gen_op_movl_T0_im(dc->pc);
+	    } else {
+		gen_op_movq_T0_im64(dc->pc >> 32, dc->pc);
+	    }
+#else
 	    gen_op_movl_T0_im(dc->pc);
+#endif
 	    gen_movl_T0_reg(15);
 	    target += dc->pc;
             gen_mov_pc_npc(dc);
@@ -1039,6 +1122,25 @@
 		    gen_op_movl_T0_env(offsetof(CPUSPARCState, fprs));
                     gen_movl_T0_reg(rd);
                     break;
+		case 0x17: /* Tick compare */
+		    gen_op_movtl_T0_env(offsetof(CPUSPARCState, tick_cmpr));
+                    gen_movl_T0_reg(rd);
+                    break;
+		case 0x18: /* System tick */
+                    gen_op_rdtick(); // XXX
+                    gen_movl_T0_reg(rd);
+                    break;
+		case 0x19: /* System tick compare */
+		    gen_op_movtl_T0_env(offsetof(CPUSPARCState, stick_cmpr));
+                    gen_movl_T0_reg(rd);
+                    break;
+		case 0x10: /* Performance Control */
+		case 0x11: /* Performance Instrumentation Counter */
+		case 0x12: /* Dispatch Control */
+		case 0x13: /* Graphics Status */
+		case 0x14: /* Softint set, WO */
+		case 0x15: /* Softint clear, WO */
+		case 0x16: /* Softint write */
 #endif
                 default:
                     goto illegal_insn;
@@ -1549,6 +1651,50 @@
 		    gen_movl_T0_reg(rd);
 		}
 #endif
+#ifdef TARGET_SPARC64
+	    } else if (xop == 0x25) { /* sll, V9 sllx ( == sll) */
+                rs1 = GET_FIELD(insn, 13, 17);
+		gen_movl_reg_T0(rs1);
+		if (IS_IMM) {	/* immediate */
+                    rs2 = GET_FIELDs(insn, 20, 31);
+                    gen_movl_simm_T1(rs2);
+                } else {		/* register */
+                    rs2 = GET_FIELD(insn, 27, 31);
+                    gen_movl_reg_T1(rs2);
+                }
+		gen_op_sll();
+		gen_movl_T0_reg(rd);
+	    } else if (xop == 0x26) { /* srl, V9 srlx */
+                rs1 = GET_FIELD(insn, 13, 17);
+		gen_movl_reg_T0(rs1);
+		if (IS_IMM) {	/* immediate */
+                    rs2 = GET_FIELDs(insn, 20, 31);
+                    gen_movl_simm_T1(rs2);
+                } else {		/* register */
+                    rs2 = GET_FIELD(insn, 27, 31);
+                    gen_movl_reg_T1(rs2);
+                }
+		if (insn & (1 << 12))
+		    gen_op_srlx();
+		else
+		    gen_op_srl();
+		gen_movl_T0_reg(rd);
+	    } else if (xop == 0x27) { /* sra, V9 srax */
+                rs1 = GET_FIELD(insn, 13, 17);
+		gen_movl_reg_T0(rs1);
+		if (IS_IMM) {	/* immediate */
+                    rs2 = GET_FIELDs(insn, 20, 31);
+                    gen_movl_simm_T1(rs2);
+                } else {		/* register */
+                    rs2 = GET_FIELD(insn, 27, 31);
+                    gen_movl_reg_T1(rs2);
+                }
+		if (insn & (1 << 12))
+		    gen_op_srax();
+		else
+		    gen_op_sra();
+		gen_movl_T0_reg(rd);
+#endif
 	    } else if (xop < 0x38) {
                 rs1 = GET_FIELD(insn, 13, 17);
 		gen_movl_reg_T0(rs1);
@@ -1660,32 +1806,20 @@
                         gen_op_mulscc_T1_T0();
                         gen_movl_T0_reg(rd);
                         break;
-                    case 0x25:	/* sll, V9 sllx ( == sll) */
+#ifndef TARGET_SPARC64
+                    case 0x25:	/* sll */
 			gen_op_sll();
                         gen_movl_T0_reg(rd);
                         break;
-                    case 0x26:  /* srl, V9 srlx */
-#ifdef TARGET_SPARC64
-			if (insn & (1 << 12))
-			    gen_op_srlx();
-			else
-			    gen_op_srl();
-#else
+                    case 0x26:  /* srl */
 			gen_op_srl();
-#endif
                         gen_movl_T0_reg(rd);
                         break;
-                    case 0x27:  /* sra, V9 srax */
-#ifdef TARGET_SPARC64
-			if (insn & (1 << 12))
-			    gen_op_srax();
-			else
-			    gen_op_sra();
-#else
+                    case 0x27:  /* sra */
 			gen_op_sra();
-#endif
                         gen_movl_T0_reg(rd);
                         break;
+#endif
                     case 0x30:
                         {
                             switch(rd) {
@@ -1709,7 +1843,28 @@
 				    gen_op_sir();
 #endif
 				break;
+			    case 0x17: /* Tick compare */
+#if !defined(CONFIG_USER_ONLY)
+				if (!supervisor(dc))
+				    goto illegal_insn;
 #endif
+				gen_op_movtl_env_T0(offsetof(CPUSPARCState, tick_cmpr));
+				break;
+			    case 0x18: /* System tick */
+#if !defined(CONFIG_USER_ONLY)
+				if (!supervisor(dc))
+				    goto illegal_insn;
+#endif
+				gen_op_movtl_env_T0(offsetof(CPUSPARCState, stick_cmpr));
+				break;
+			    case 0x19: /* System tick compare */
+#if !defined(CONFIG_USER_ONLY)
+				if (!supervisor(dc))
+				    goto illegal_insn;
+#endif
+				gen_op_movtl_env_T0(offsetof(CPUSPARCState, stick_cmpr));
+				break;
+
 			    case 0x10: /* Performance Control */
 			    case 0x11: /* Performance Instrumentation Counter */
 			    case 0x12: /* Dispatch Control */
@@ -1717,9 +1872,7 @@
 			    case 0x14: /* Softint set */
 			    case 0x15: /* Softint clear */
 			    case 0x16: /* Softint write */
-			    case 0x17: /* Tick compare */
-			    case 0x18: /* System tick */
-			    case 0x19: /* System tick compare */
+#endif
                             default:
                                 goto illegal_insn;
                             }
@@ -1770,7 +1923,7 @@
 				gen_op_wrtick();
 				break;
 			    case 5: // tba
-				gen_op_movl_env_T0(offsetof(CPUSPARCState, tbr));
+				gen_op_movtl_env_T0(offsetof(CPUSPARCState, tbr));
 				break;
 			    case 6: // pstate
 				gen_op_wrpstate();
@@ -1896,7 +2049,6 @@
 		}
 #ifdef TARGET_SPARC64
 	    } else if (xop == 0x39) { /* V9 return */
-		gen_op_restore();
                 rs1 = GET_FIELD(insn, 13, 17);
 		gen_movl_reg_T0(rs1);
                 if (IS_IMM) {	/* immediate */
@@ -1920,6 +2072,7 @@
 		    }
 #endif
                 }
+		gen_op_restore();
 		gen_mov_pc_npc(dc);
 		gen_op_movl_npc_T0();
 		dc->npc = DYNAMIC_PC;
@@ -1993,13 +2146,17 @@
 			case 0:
 			    if (!supervisor(dc))
 				goto priv_insn;
+			    dc->npc = DYNAMIC_PC;
+			    dc->pc = DYNAMIC_PC;
 			    gen_op_done();
-			    break;
+			    goto jmp_insn;
 			case 1:
 			    if (!supervisor(dc))
 				goto priv_insn;
+			    dc->npc = DYNAMIC_PC;
+			    dc->pc = DYNAMIC_PC;
 			    gen_op_retry();
-			    break;
+			    goto jmp_insn;
 			default:
 			    goto illegal_insn;
 			}
@@ -2317,7 +2474,7 @@
 	gen_op_next_insn();
     } else if (dc->npc == JUMP_PC) {
         /* we can do a static jump */
-        gen_op_branch2((long)dc->tb, dc->jump_pc[0], dc->jump_pc[1]);
+        gen_branch2(dc, (long)dc->tb, dc->jump_pc[0], dc->jump_pc[1]);
         dc->is_br = 1;
     } else {
 	dc->pc = dc->npc;
@@ -2365,6 +2522,7 @@
     gen_opc_ptr = gen_opc_buf;
     gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
     gen_opparam_ptr = gen_opparam_buf;
+    nb_gen_labels = 0;
 
     do {
         if (env->nb_breakpoints > 0) {
@@ -2421,7 +2579,7 @@
         if (dc->pc != DYNAMIC_PC && 
             (dc->npc != DYNAMIC_PC && dc->npc != JUMP_PC)) {
             /* static PC and NPC: we can use direct chaining */
-            gen_op_branch((long)tb, dc->pc, dc->npc);
+            gen_branch(dc, (long)tb, dc->pc, dc->npc);
         } else {
             if (dc->pc != DYNAMIC_PC)
                 gen_jmp_im(dc->pc);
@@ -2487,15 +2645,16 @@
 #else
     env->psrs = 1;
     env->psrps = 1;
-    env->pc = 0xffd00000;
     env->gregs[1] = ram_size;
-    env->npc = env->pc + 4;
 #ifdef TARGET_SPARC64
-    env->pstate = PS_AM | PS_PRIV; // XXX: Force AM
+    env->pstate = PS_PRIV;
     env->version = GET_VER(env);
+    env->pc = 0x1fff0000000ULL;
 #else
     env->mmuregs[0] = (0x04 << 24); /* Impl 0, ver 4, MMU disabled */
+    env->pc = 0xffd00000;
 #endif
+    env->npc = env->pc + 4;
 #endif
 }
 
diff --git a/vl.h b/vl.h
index 85d0cf4..f9fe6a3 100644
--- a/vl.h
+++ b/vl.h
@@ -559,6 +559,7 @@
 PCIBus *pci_prep_init(void);
 PCIBus *pci_grackle_init(uint32_t base);
 PCIBus *pci_pmac_init(void);
+PCIBus *pci_apb_init(target_ulong special_base, target_ulong mem_base);
 
 /* openpic.c */
 typedef struct openpic_t openpic_t;