Rely less on initialized data.

(1) Don't statically initialize HWRPB and PCBB.
(2) Use SwpPal at the end of do_start to install the PCBB and PTBR,
    rather than setting those up in __start in PALmode.
(3) Use -mbuild-constants to prevent the compiler using static data.
diff --git a/Makefile b/Makefile
index 5514e43..45dcfb7 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,9 @@
 CROSS = alphaev67-linux-
 CC = $(CROSS)gcc
 LD = $(CROSS)ld
-CFLAGS = -O -g -msmall-text -msmall-data -fvisibility=hidden \
-	-mno-fp-regs -fno-strict-aliasing
+
+CFLAGS = -O -g -fvisibility=hidden -fno-strict-aliasing \
+  -msmall-text -msmall-data -mno-fp-regs -mbuild-constants
 
 OBJS = pal.o init.o uart.o memset.o printf.o
 
diff --git a/init.c b/init.c
index f0a0483..25ca743 100644
--- a/init.c
+++ b/init.c
@@ -8,80 +8,114 @@
 #define PAGE_SIZE	(1ul << PAGE_SHIFT)
 #define PAGE_OFFSET	0xfffffc0000000000UL
 
-#if 1
-#define PA(VA)		((unsigned long)(unsigned int)(VA))
-#else
-#define PA(VA)		((VA) - PAGE_OFFSET)
-#endif
-#define VA(PA)		((PA) + PAGE_OFFSET)
+#define VPTPTR		0xfffffffe00000000UL
+
+#define PA(VA)		((unsigned long)(VA) & 0xfffffffffful)
+#define VA(PA)		((void *)(PA) + PAGE_OFFSET)
 
 #define HZ	1024
 
-struct pcb_struct pal_pcb __attribute__((section(".sbss")));
-
 struct hwrpb_combine {
   struct hwrpb_struct hwrpb;
   struct percpu_struct processor;
   struct memdesc_struct md;
   struct memclust_struct mc[2];
-} hwrpb __attribute__((section(".data.hwrpb"))) = {
-  /* This is HWRPB\0\0\0.  */
-  .hwrpb.id = 0x4857525042000000,
-
-  .hwrpb.size = sizeof(struct hwrpb_struct),
-  .hwrpb.pagesize = PAGE_SIZE,
-  .hwrpb.ssn = "MILO QEMU",
-
-  /* ??? This should match TARGET_PHYS_ADDR_SPACE_BITS from qemu.  */
-  .hwrpb.pa_bits = 44,
-
-  /* ??? Should we be truethful and say 1 asn, or simply pretend we
-     have ASNs but ignore them?  */
-  .hwrpb.max_asn = 127,
-
-  /* For now, hard-code emulation of sx164.  */
-  .hwrpb.cpuid = PCA56_CPU,
-  .processor.type = PCA56_CPU,
-  .hwrpb.sys_type = ST_DEC_EB164,
-  .hwrpb.sys_variation = 15 << 10,
-  .hwrpb.sys_revision = 0,
-
-  .hwrpb.intr_freq = HZ * 4096,
-
-  /* ??? What the hell should we put here.  Measure like the kernel will?  */
-  .hwrpb.cycle_freq = 400000000,
-
-  .hwrpb.vptb = 0x200000000UL,
-  .hwrpb.nr_processors = 1,
-  .hwrpb.processor_size = sizeof(struct percpu_struct),
-  .hwrpb.processor_offset = offsetof(struct hwrpb_combine, processor),
-
-  .hwrpb.mddt_offset = offsetof(struct hwrpb_combine, md),
-  .md.numclusters = 2,
-  .mc[0].usage = 2
 };
 
-unsigned long page_dir[1024] __attribute__((aligned(PAGE_SIZE)));
+extern char stack[PAGE_SIZE] __attribute__((section(".sbss")));
+extern char _end[] __attribute__((visibility("hidden"), nocommon));
 
-extern char _end[];
-static unsigned long last_alloc = (unsigned long)_end;
+struct pcb_struct pcb __attribute__((section(".sbss")));
+
+static unsigned long page_dir[1024] __attribute__((aligned(PAGE_SIZE)));
+
+/* The HWRPB must be aligned because it is exported at INIT_HWRPB.  */
+struct hwrpb_combine hwrpb __attribute__((aligned(PAGE_SIZE)));
+
+static void *last_alloc;
 
 static void *
 alloc (unsigned long size, unsigned long align)
 {
-  unsigned long p = (last_alloc + align - 1) & ~(align - 1);
+  void *p = (void *)(((unsigned long)last_alloc + align - 1) & ~(align - 1));
   last_alloc = p + size;
-  return memset ((void *)p, 0, size);
+  return memset (p, 0, size);
 }
 
-static unsigned long
+static inline unsigned long
+pt_index(unsigned long addr, int level)
+{
+  return (addr >> (PAGE_SHIFT + (10 * level))) & 0x3ff;
+}
+
+static inline unsigned long
+build_pte (void *page)
+{
+  unsigned long bits;
+
+  bits = PA((unsigned long)page) << (32 - PAGE_SHIFT);
+  bits += _PAGE_VALID | _PAGE_KRE | _PAGE_KWE;
+
+  return bits;
+}
+
+static inline void *
+pte_page (unsigned long pte)
+{
+  return VA(pte >> 32 << PAGE_SHIFT);
+}
+
+static void
+set_pte (unsigned long addr, void *page)
+{
+  unsigned long *pt = page_dir;
+  unsigned long index;
+
+  index = pt_index(addr, 2);
+  if (pt[index] != 0)
+    pt = pte_page (pt[index]);
+  else
+    {
+      unsigned long *npt = alloc(PAGE_SIZE, PAGE_SIZE);
+      pt[index] = build_pte (npt);
+      pt = npt;
+    }
+
+  index = pt_index(addr, 1);
+  if (pt[index] != 0)
+    pt = pte_page (pt[index]);
+  else
+    {
+      unsigned long *npt = alloc(PAGE_SIZE, PAGE_SIZE);
+      pt[index] = build_pte (npt);
+      pt = npt;
+    }
+
+  index = pt_index(addr, 0);
+  pt[index] = build_pte (page);
+}
+
+static void
+init_page_table(void)
+{
+  /* Install the self-reference for the virtual page table base register.  */
+  page_dir[pt_index(VPTPTR, 2)] = build_pte(page_dir);
+
+  set_pte ((unsigned long)INIT_HWRPB, &hwrpb);
+  
+  /* ??? SRM maps some amount of memory at 0x20000000 for use by programs
+     started from the console prompt.  Including the bootloader.  While
+     we're emulating MILO, don't bother as we jump straight to the kernel
+     loaded into KSEG.  */
+}
+
+static inline unsigned long
 init_cpuid (void)
 {
   unsigned long implver, amask;
 
-  __asm ("implver %0" : "=r"(implver));
-  __asm ("amask %1,%0" : "=r"(amask) : "r"(-1));
-  amask = ~amask;
+  implver = __builtin_alpha_implver();
+  amask = ~__builtin_alpha_amask(-1);
 
   switch (implver)
     {
@@ -104,113 +138,109 @@
 }
 
 static void
-hwrpb_update_checksum (void)
-{
-  unsigned long sum = 0, *l;
-  for (l = (unsigned long *) &hwrpb.hwrpb; l < &hwrpb.hwrpb.chksum; ++l)
-    sum += *l;
-  hwrpb.hwrpb.chksum = sum;
-}
-
-static void
 init_hwrpb (unsigned long memsize)
 {
   unsigned long pal_pages;
+  
+  hwrpb.hwrpb.phys_addr = PA(&hwrpb);
 
-  hwrpb.hwrpb.phys_addr = PA((unsigned long)&hwrpb);
+  /* Yes, the 'HWRPB' magic is in big-endian byte ordering.  */
+  hwrpb.hwrpb.id = ( (long)'H' << 56
+		   | (long)'W' << 48
+		   | (long)'R' << 40
+		   | (long)'P' << 32
+		   | (long)'B' << 24);
+
+  hwrpb.hwrpb.size = sizeof(struct hwrpb_struct);
+
+  /* The inclusion of MILO here tells the Linux kernel that we do
+     not (yet) support any of the extended console support routines
+     that are in SRM.  */
+  ((int *)hwrpb.hwrpb.ssn)[0] = ( 'M' << 0
+				| 'I' << 8
+				| 'L' << 16
+				| 'O' << 24);
+  ((int *)hwrpb.hwrpb.ssn)[1] = ( ' ' << 0
+				| 'Q' << 8
+				| 'E' << 16
+				| 'M' << 24);
+  ((int *)hwrpb.hwrpb.ssn)[2] = ( 'U' << 0);
+
+  /* For now, hard-code emulation of sx164.  */
+  hwrpb.hwrpb.cpuid = PCA56_CPU;
+  hwrpb.hwrpb.pagesize = PAGE_SIZE;
+  hwrpb.hwrpb.pa_bits = 40;
+  hwrpb.hwrpb.max_asn = 127;
+  hwrpb.hwrpb.sys_type = ST_DEC_EB164;
+  hwrpb.hwrpb.sys_variation = 15 << 10;
+  hwrpb.hwrpb.sys_revision = 0;
+  hwrpb.processor.type = PCA56_CPU;
+
+  hwrpb.hwrpb.intr_freq = HZ * 4096;
+
+  /* ??? What the hell should we put here.  Measure like the kernel will?  */
+  hwrpb.hwrpb.cycle_freq = 400000000;
+
+  hwrpb.hwrpb.vptb = VPTPTR;
+
+  hwrpb.hwrpb.nr_processors = 1;
+  hwrpb.hwrpb.processor_size = sizeof(struct percpu_struct);
+  hwrpb.hwrpb.processor_offset = offsetof(struct hwrpb_combine, processor);
+
+  hwrpb.hwrpb.mddt_offset = offsetof(struct hwrpb_combine, md);
+  hwrpb.md.numclusters = 2;
 
   pal_pages = (PA(last_alloc) + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
   hwrpb.mc[0].numpages = pal_pages;
+  hwrpb.mc[0].usage = 1;
   hwrpb.mc[1].start_pfn = pal_pages;
   hwrpb.mc[1].numpages = (memsize >> PAGE_SHIFT) - pal_pages;
 
-  hwrpb_update_checksum ();
+  {
+    unsigned long sum = 0, *l;
+    for (l = (unsigned long *) &hwrpb.hwrpb; l < &hwrpb.hwrpb.chksum; ++l)
+      sum += *l;
+    hwrpb.hwrpb.chksum = sum;
+  }
 }
 
 static void
 init_pcb (void)
 {
-  pal_pcb.ptbr = PA((unsigned long)page_dir);
-  pal_pcb.flags = 1;
+  pcb.ksp = (unsigned long)stack + sizeof(stack);
+  pcb.ptbr = PA(page_dir);
+  pcb.flags = 1; /* FEN */
 }
 
-static inline unsigned long
-build_pte (void *page)
+void
+do_hello(void)
 {
-  unsigned long bits;
-
-  bits = PA((unsigned long)page) << (32 - PAGE_SHIFT);
-  bits += _PAGE_VALID | _PAGE_KRE | _PAGE_KWE;
-
-  return bits;
-}
-
-static inline void *
-pte_page (unsigned long pte)
-{
-  return (void *)VA(pte >> 32 << PAGE_SHIFT);
-}
-
-static void
-set_pte (unsigned long addr, void *page)
-{
-  unsigned long *pt = page_dir;
-  unsigned long index;
-
-  index = (addr >> (PAGE_SHIFT+20)) % 1024;
-  if (pt[index] != 0)
-    pt = pte_page (pt[index]);
-  else
-    {
-      unsigned long *npt = alloc(PAGE_SIZE, PAGE_SIZE);
-      pt[index] = build_pte (npt);
-      pt = npt;
-    }
-
-  index = (addr >> (PAGE_SHIFT+10)) % 1024;
-  if (pt[index] != 0)
-    pt = pte_page (pt[index]);
-  else
-    {
-      unsigned long *npt = alloc(PAGE_SIZE, PAGE_SIZE);
-      pt[index] = build_pte (npt);
-      pt = npt;
-    }
-
-  index = (addr >> PAGE_SHIFT) % 1024;
-  pt[index] = build_pte (page);
-}
-
-static void
-init_page_table (unsigned long memsize)
-{
-  unsigned long i, addr, max_addr, page;
-
-  set_pte ((unsigned long)INIT_HWRPB, &hwrpb);
-  
-  /* SRM places the self-map for the VPTBR in the second entry.  */
-  /* MILO places the self-map for the VPTBR in the last entry.  */
-  page_dir[1023] = build_pte (page_dir);
-
-  /* Write the SRM vptptr.  */
-  {
-    register unsigned long a0 __asm__("$16") = 0xfffffffe00000000UL;
-    __asm ("call_pal 0x2d" : : "r"(a0));
-  }
+  uart_puts(COM1, "Hello, World!\n");
+  asm ("halt");
+  __builtin_unreachable ();
 }
 
 void
 do_start(unsigned long memsize, void (*kernel_entry)(void))
 {
-  init_page_table (memsize);
-  init_hwrpb (memsize);
-  init_pcb ();
+  last_alloc = _end;
 
-  uart_init ();
-  uart_puts (COM1, "Hello, World!\n");
+  init_page_table();
+  init_hwrpb(memsize);
+  init_pcb();
+  uart_init();
 
-  if (kernel_entry)
-    kernel_entry();
-  asm ("halt");
+  {
+    register int variant __asm__("$16") = 2;	/* OSF/1 PALcode */
+    register void (*pc)(void) __asm__("$17");
+    register unsigned long pa_pcb __asm__("$18");
+    register unsigned long vptptr __asm__("$19");
+
+    pc = (kernel_entry ? kernel_entry : do_hello);
+    pa_pcb = PA(&pcb);
+    vptptr = VPTPTR;
+    asm("call_pal 0x0a" : : "r"(variant), "r"(pc), "r"(pa_pcb), "r"(vptptr));
+  }
+  __builtin_unreachable ();
 }
diff --git a/pal.S b/pal.S
index f62e623..913b8f8 100644
--- a/pal.S
+++ b/pal.S
@@ -182,7 +182,7 @@
 __start:
 	// Initialize GP and stack.
 	br	$gp, .+4
-	ldah	$gp, 0($gp)		!gpdisp!1
+	ldah	$gp, 0($gp)			!gpdisp!1
 	lda	$gp, 0($gp)			!gpdisp!1
 	mtpr	$gp, ptPgp
 
@@ -192,15 +192,6 @@
 	lda	t0, IPL_K_HIGH
 	mtpr	t0, qemu_ps
 
-	// Load the initial PCB and page table elements.
-	lda	t0, page_dir($gp)		!gprel
-	zap	t0, 0xf0, t0
-	mtpr	t0, qemu_ptbr
-
-	lda	t0, pal_pcb($gp)		!gprel
-	zap	t0, 0xf0, t0
-	mtpr	t0, ptPcbb
-
 	// Make sure kernel entry points are invalid.
 	lda	t0, -1
 	mtpr	t0, ptEntUna
@@ -538,6 +529,12 @@
  *			0 - Success (PALcode was switched)
  *			1 - Unknown PALcode variant
  *			2 - Known PALcode variant, but PALcode not loaded
+ *
+ *	r26 (ra) = r27 (pv) = New PC
+ *		Note that this is non-architected, but is relied on by
+ *		the usage of SwpPal within our own console code in order
+ *		to simplify its use within C code.
+ *
  */
 	ORG_CALL_PAL_PRIV(0x0A)
 CallPal_SwpPal:
@@ -556,8 +553,32 @@
 
 	.text	1
 CallPal_SwpPal_Cont:
-	// YOUAREHERE
-	halt
+	rpcc	p0
+	mtpr	a2, ptPcbb
+	mtpr	a3, qemu_vptptr
+
+	ldq_p	$sp, PCB_Q_KSP(a2)
+	ldq_p	t0, PCB_Q_USP(a2)
+	ldq_p	t1, PCB_Q_PTBR(a2)
+	ldl_p	t2, PCB_L_PCC(a2)
+	ldq_p	t3, PCB_Q_UNIQUE(a2)
+	ldq_p	t4, PCB_Q_FEN(a2)
+
+	mtpr	t0, ptUsp
+	mtpr	t1, qemu_ptbr
+	mtpr	t3, qemu_unique
+
+	subl	t2, p0, t2
+	mtpr	t2, qemu_pcc_ofs
+
+	and	t4, 1, t4
+	mtpr	t4, qemu_fen
+
+	mtpr	$31, qemu_tbia		// Flush TLB for new PTBR
+
+	mov	a1, $26
+	mov	a1, $27
+	hw_ret	(a1)
 ENDFN	CallPal_SwpPal_Cont
 	.previous
 
@@ -2050,6 +2071,7 @@
 	.size	laf_base, . - laf_base
 
 	.align 3
+	.globl	stack
 	.type	stack,@object
 	.size	stack,STACK_SIZE
 stack:	.skip	STACK_SIZE
diff --git a/palcode.ld b/palcode.ld
index a1304b5..59efb04 100644
--- a/palcode.ld
+++ b/palcode.ld
@@ -5,7 +5,7 @@
 {
   . = 0xfffffc0000000000;
   .text : { *(.text*) }
-  .rodata : { *(.rodata) }
+  .rodata : { *(.rodata*) }
   .data ALIGN(8192) : { *(.data.hwrpb) *(.data*) }
   .got : { *(.got.plt) *(.got) }
   .sdata : { *(.sdata*) }