Merge tag 'pull-vfio-20230630' of https://github.com/legoater/qemu into staging

vfio queue:

* migration: New switchover ack to reduce downtime
* VFIO migration pre-copy support
* Removal of the VFIO migration experimental flag
* Alternate offset for GPUDirect Cliques
* Misc fixes

# -----BEGIN PGP SIGNATURE-----
#
# iQIzBAABCAAdFiEEoPZlSPBIlev+awtgUaNDx8/77KEFAmSeVHYACgkQUaNDx8/7
# 7KHeZw/+LRe9QQpx8hU//vKBvLet2QvI3WUaXGHiHbblbRT6HhiHjWHB2/8j6jji
# QhAGJ6w9yoKODyY0kGpVFEnkmXOKyqwWssBheV219ntZs09pFGxZr/ldUhT22aBN
# kH8mHU9BZ3J+zF/kKphpcIC1sPxVu/DlrtnJu5vDGuRAOu8+3kFV217JC1yGs1Vh
# n+KOho8a8oP9qxtzfvQ9iZ4dpBOOKpE9vscS12wJAlen93AGB6esR7VaLxDjExRP
# yL1pguQ8ZZ1gEXXbXO62djKo3IViobtD08KmCXTzQ6TVquLleJzqgjp+A0THnYAe
# J9Rlja7LpsO9MYSxmRE9WcQccC+sAGn/t/ufB0tL8zR43FvfhbF5H0PzBBY0H7YA
# JlzN+fgrKEEHJwMhXANNvSddhWCwvrkjNxo/80u3ySYMQR1Hav/tsXYBlk16e5nS
# fmtrFGTwhsVdy1Q6ZqEOyTni1eiYt5stEQMZFODdUNj6b9FugSZ0BK+2WN/M0CzU
# 6mKmJQgZAG/nBoRJm/XCO5OKQ6wm/4tm6F4HSH5EJ6mDT+DqETAk4GRUWTbYa2/G
# yAAOlhTMu8Xc/NhMeJ7Z99dyq0SM8pi/XpVEIv7p9yBak8ix60iCWZtDE8vlDv3M
# UfMVMTAvTS30kbS6FDN2Yyl6l8/ETdcwVIN4l02ipGzpMCtn9EQ=
# =dKUj
# -----END PGP SIGNATURE-----
# gpg: Signature made Fri 30 Jun 2023 06:05:10 AM CEST
# gpg:                using RSA key A0F66548F04895EBFE6B0B6051A343C7CFFBECA1
# gpg: Good signature from "Cédric Le Goater <clg@kaod.org>" [undefined]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: A0F6 6548 F048 95EB FE6B  0B60 51A3 43C7 CFFB ECA1

* tag 'pull-vfio-20230630' of https://github.com/legoater/qemu:
  vfio/pci: Free leaked timer in vfio_realize error path
  vfio/pci: Fix a segfault in vfio_realize
  MAINTAINERS: Promote Cédric to VFIO co-maintainer
  vfio/migration: Make VFIO migration non-experimental
  vfio/migration: Reset bytes_transferred properly
  vfio/pci: Call vfio_prepare_kvm_msi_virq_batch() in MSI retry path
  hw/vfio/pci-quirks: Support alternate offset for GPUDirect Cliques
  vfio: Implement a common device info helper
  vfio/migration: Add support for switchover ack capability
  vfio/migration: Add VFIO migration pre-copy support
  vfio/migration: Store VFIO migration flags in VFIOMigration
  vfio/migration: Refactor vfio_save_block() to return saved data size
  tests: Add migration switchover ack capability test
  migration: Enable switchover ack capability
  migration: Implement switchover ack logic
  migration: Add switchover ack capability

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
diff --git a/.gitlab-ci.d/crossbuilds.yml b/.gitlab-ci.d/crossbuilds.yml
index 1e0e6c7..b6ec99e 100644
--- a/.gitlab-ci.d/crossbuilds.yml
+++ b/.gitlab-ci.d/crossbuilds.yml
@@ -57,7 +57,7 @@
   variables:
     IMAGE: fedora-i386-cross
     ACCEL: tcg-interpreter
-    EXTRA_CONFIGURE_OPTS: --target-list=i386-softmmu,i386-linux-user,aarch64-softmmu,aarch64-linux-user,ppc-softmmu,ppc-linux-user
+    EXTRA_CONFIGURE_OPTS: --target-list=i386-softmmu,i386-linux-user,aarch64-softmmu,aarch64-linux-user,ppc-softmmu,ppc-linux-user --disable-plugins
     MAKE_CHECK_ARGS: check check-tcg
 
 cross-mipsel-system:
diff --git a/pc-bios/s390-ccw.img b/pc-bios/s390-ccw.img
index c9a5a21..f0d9ef6 100644
--- a/pc-bios/s390-ccw.img
+++ b/pc-bios/s390-ccw.img
Binary files differ
diff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile
index 2e8cc01..acfcd1e 100644
--- a/pc-bios/s390-ccw/Makefile
+++ b/pc-bios/s390-ccw/Makefile
@@ -55,7 +55,7 @@
 	    $(call cc-option,-march=z900,-march=z10)) 3> config-cc.mak
 -include config-cc.mak
 
-LDFLAGS += -Wl,-pie -nostdlib
+LDFLAGS += -Wl,-pie -nostdlib -z noexecstack
 
 build-all: s390-ccw.img s390-netboot.img
 
diff --git a/pc-bios/s390-ccw/cio.h b/pc-bios/s390-ccw/cio.h
index 88a88ad..8b18153 100644
--- a/pc-bios/s390-ccw/cio.h
+++ b/pc-bios/s390-ccw/cio.h
@@ -17,32 +17,32 @@
  * path management control word
  */
 struct pmcw {
-    __u32 intparm;      /* interruption parameter */
-    __u32 qf:1;         /* qdio facility */
-    __u32 w:1;
-    __u32 isc:3;        /* interruption subclass */
-    __u32 res5:3;       /* reserved zeros */
-    __u32 ena:1;        /* enabled */
-    __u32 lm:2;         /* limit mode */
-    __u32 mme:2;        /* measurement-mode enable */
-    __u32 mp:1;         /* multipath mode */
-    __u32 tf:1;         /* timing facility */
-    __u32 dnv:1;        /* device number valid */
-    __u32 dev:16;       /* device number */
-    __u8  lpm;          /* logical path mask */
-    __u8  pnom;         /* path not operational mask */
-    __u8  lpum;         /* last path used mask */
-    __u8  pim;          /* path installed mask */
-    __u16 mbi;          /* measurement-block index */
-    __u8  pom;          /* path operational mask */
-    __u8  pam;          /* path available mask */
-    __u8  chpid[8];     /* CHPID 0-7 (if available) */
-    __u32 unused1:8;    /* reserved zeros */
-    __u32 st:3;         /* subchannel type */
-    __u32 unused2:18;   /* reserved zeros */
-    __u32 mbfc:1;       /* measurement block format control */
-    __u32 xmwme:1;      /* extended measurement word mode enable */
-    __u32 csense:1;     /* concurrent sense; can be enabled ...*/
+    u32 intparm;        /* interruption parameter */
+    u32 qf:1;           /* qdio facility */
+    u32 w:1;
+    u32 isc:3;          /* interruption subclass */
+    u32 res5:3;         /* reserved zeros */
+    u32 ena:1;          /* enabled */
+    u32 lm:2;           /* limit mode */
+    u32 mme:2;          /* measurement-mode enable */
+    u32 mp:1;           /* multipath mode */
+    u32 tf:1;           /* timing facility */
+    u32 dnv:1;          /* device number valid */
+    u32 dev:16;         /* device number */
+    u8  lpm;            /* logical path mask */
+    u8  pnom;           /* path not operational mask */
+    u8  lpum;           /* last path used mask */
+    u8  pim;            /* path installed mask */
+    u16 mbi;            /* measurement-block index */
+    u8  pom;            /* path operational mask */
+    u8  pam;            /* path available mask */
+    u8  chpid[8];       /* CHPID 0-7 (if available) */
+    u32 unused1:8;      /* reserved zeros */
+    u32 st:3;           /* subchannel type */
+    u32 unused2:18;     /* reserved zeros */
+    u32 mbfc:1;         /* measurement block format control */
+    u32 xmwme:1;        /* extended measurement word mode enable */
+    u32 csense:1;       /* concurrent sense; can be enabled ...*/
                         /*  ... per MSCH, however, if facility */
                         /*  ... is not installed, this results */
                         /*  ... in an operand exception.       */
@@ -50,24 +50,24 @@
 
 /* Target SCHIB configuration. */
 struct schib_config {
-    __u64 mba;
-    __u32 intparm;
-    __u16 mbi;
-    __u32 isc:3;
-    __u32 ena:1;
-    __u32 mme:2;
-    __u32 mp:1;
-    __u32 csense:1;
-    __u32 mbfc:1;
+    u64 mba;
+    u32 intparm;
+    u16 mbi;
+    u32 isc:3;
+    u32 ena:1;
+    u32 mme:2;
+    u32 mp:1;
+    u32 csense:1;
+    u32 mbfc:1;
 } __attribute__ ((packed));
 
 struct scsw {
-    __u16 flags;
-    __u16 ctrl;
-    __u32 cpa;
-    __u8 dstat;
-    __u8 cstat;
-    __u16 count;
+    u16 flags;
+    u16 ctrl;
+    u32 cpa;
+    u8 dstat;
+    u8 cstat;
+    u16 count;
 } __attribute__ ((packed));
 
 /* Function Control */
@@ -117,42 +117,42 @@
 typedef struct schib {
     struct pmcw pmcw;     /* path management control word */
     struct scsw scsw;     /* subchannel status word */
-    __u64 mba;            /* measurement block address */
-    __u8 mda[4];          /* model dependent area */
+    u64 mba;              /* measurement block address */
+    u8 mda[4];            /* model dependent area */
 } __attribute__ ((packed, aligned(4))) Schib;
 
 typedef struct subchannel_id {
     union {
         struct {
-            __u16 cssid:8;
-            __u16 reserved:4;
-            __u16 m:1;
-            __u16 ssid:2;
-            __u16 one:1;
+            u16 cssid:8;
+            u16 reserved:4;
+            u16 m:1;
+            u16 ssid:2;
+            u16 one:1;
         };
-        __u16 sch_id;
+        u16 sch_id;
     };
-    __u16 sch_no;
+    u16 sch_no;
 } __attribute__ ((packed, aligned(4))) SubChannelId;
 
 struct chsc_header {
-    __u16 length;
-    __u16 code;
+    u16 length;
+    u16 code;
 } __attribute__((packed));
 
 typedef struct chsc_area_sda {
     struct chsc_header request;
-    __u8 reserved1:4;
-    __u8 format:4;
-    __u8 reserved2;
-    __u16 operation_code;
-    __u32 reserved3;
-    __u32 reserved4;
-    __u32 operation_data_area[252];
+    u8 reserved1:4;
+    u8 format:4;
+    u8 reserved2;
+    u16 operation_code;
+    u32 reserved3;
+    u32 reserved4;
+    u32 operation_data_area[252];
     struct chsc_header response;
-    __u32 reserved5:4;
-    __u32 format2:4;
-    __u32 reserved6:24;
+    u32 reserved5:4;
+    u32 format2:4;
+    u32 reserved6:24;
 } __attribute__((packed)) ChscAreaSda;
 
 /*
@@ -160,37 +160,37 @@
  */
 struct tpi_info {
     struct subchannel_id schid;
-    __u32 intparm;      /* interruption parameter */
-    __u32 adapter_IO:1;
-    __u32 reserved2:1;
-    __u32 isc:3;
-    __u32 reserved3:12;
-    __u32 int_type:3;
-    __u32 reserved4:12;
+    u32 intparm;      /* interruption parameter */
+    u32 adapter_IO:1;
+    u32 reserved2:1;
+    u32 isc:3;
+    u32 reserved3:12;
+    u32 int_type:3;
+    u32 reserved4:12;
 } __attribute__ ((packed, aligned(4)));
 
 /* channel command word (format 0) */
 typedef struct ccw0 {
-    __u8 cmd_code;
-    __u32 cda:24;
-    __u32 chainData:1;
-    __u32 chain:1;
-    __u32 sli:1;
-    __u32 skip:1;
-    __u32 pci:1;
-    __u32 ida:1;
-    __u32 suspend:1;
-    __u32 mida:1;
-    __u8 reserved;
-    __u16 count;
+    u8 cmd_code;
+    u32 cda:24;
+    u32 chainData:1;
+    u32 chain:1;
+    u32 sli:1;
+    u32 skip:1;
+    u32 pci:1;
+    u32 ida:1;
+    u32 suspend:1;
+    u32 mida:1;
+    u8 reserved;
+    u16 count;
 } __attribute__ ((packed, aligned(8))) Ccw0;
 
 /* channel command word (format 1) */
 typedef struct ccw1 {
-    __u8 cmd_code;
-    __u8 flags;
-    __u16 count;
-    __u32 cda;
+    u8 cmd_code;
+    u8 flags;
+    u16 count;
+    u32 cda;
 } __attribute__ ((packed, aligned(8))) Ccw1;
 
 /* do_cio() CCW formats */
@@ -234,31 +234,31 @@
  * Command-mode operation request block
  */
 typedef struct cmd_orb {
-    __u32 intparm;    /* interruption parameter */
-    __u32 key:4;      /* flags, like key, suspend control, etc. */
-    __u32 spnd:1;     /* suspend control */
-    __u32 res1:1;     /* reserved */
-    __u32 mod:1;      /* modification control */
-    __u32 sync:1;     /* synchronize control */
-    __u32 fmt:1;      /* format control */
-    __u32 pfch:1;     /* prefetch control */
-    __u32 isic:1;     /* initial-status-interruption control */
-    __u32 alcc:1;     /* address-limit-checking control */
-    __u32 ssic:1;     /* suppress-suspended-interr. control */
-    __u32 res2:1;     /* reserved */
-    __u32 c64:1;      /* IDAW/QDIO 64 bit control  */
-    __u32 i2k:1;      /* IDAW 2/4kB block size control */
-    __u32 lpm:8;      /* logical path mask */
-    __u32 ils:1;      /* incorrect length */
-    __u32 zero:6;     /* reserved zeros */
-    __u32 orbx:1;     /* ORB extension control */
-    __u32 cpa;    /* channel program address */
+    u32 intparm;    /* interruption parameter */
+    u32 key:4;      /* flags, like key, suspend control, etc. */
+    u32 spnd:1;     /* suspend control */
+    u32 res1:1;     /* reserved */
+    u32 mod:1;      /* modification control */
+    u32 sync:1;     /* synchronize control */
+    u32 fmt:1;      /* format control */
+    u32 pfch:1;     /* prefetch control */
+    u32 isic:1;     /* initial-status-interruption control */
+    u32 alcc:1;     /* address-limit-checking control */
+    u32 ssic:1;     /* suppress-suspended-interr. control */
+    u32 res2:1;     /* reserved */
+    u32 c64:1;      /* IDAW/QDIO 64 bit control  */
+    u32 i2k:1;      /* IDAW 2/4kB block size control */
+    u32 lpm:8;      /* logical path mask */
+    u32 ils:1;      /* incorrect length */
+    u32 zero:6;     /* reserved zeros */
+    u32 orbx:1;     /* ORB extension control */
+    u32 cpa;        /* channel program address */
 }  __attribute__ ((packed, aligned(4))) CmdOrb;
 
 struct ciw {
-    __u8 type;
-    __u8 command;
-    __u16 count;
+    u8 type;
+    u8 command;
+    u16 count;
 };
 
 #define CU_TYPE_UNKNOWN         0x0000
@@ -271,12 +271,12 @@
  */
 typedef struct senseid {
     /* common part */
-    __u8  reserved;   /* always 0x'FF' */
-    __u16 cu_type;    /* control unit type */
-    __u8  cu_model;   /* control unit model */
-    __u16 dev_type;   /* device type */
-    __u8  dev_model;  /* device model */
-    __u8  unused;     /* padding byte */
+    u8  reserved;   /* always 0x'FF' */
+    u16 cu_type;    /* control unit type */
+    u8  cu_model;   /* control unit model */
+    u16 dev_type;   /* device type */
+    u8  dev_model;  /* device model */
+    u8  unused;     /* padding byte */
     /* extended part */
     struct ciw ciw[62];
 }  __attribute__ ((packed, aligned(4))) SenseId;
@@ -342,9 +342,9 @@
 /* interruption response block */
 typedef struct irb {
     struct scsw scsw;
-    __u32 esw[5];
-    __u32 ecw[8];
-    __u32 emw[8];
+    u32 esw[5];
+    u32 ecw[8];
+    u32 emw[8];
 }  __attribute__ ((packed, aligned(4))) Irb;
 
 /* Used for SEEK ccw commands */
diff --git a/pc-bios/s390-ccw/helper.h b/pc-bios/s390-ccw/helper.h
index 3d0731c..8e3dfcb 100644
--- a/pc-bios/s390-ccw/helper.h
+++ b/pc-bios/s390-ccw/helper.h
@@ -38,7 +38,7 @@
 
 static inline void sleep(unsigned int seconds)
 {
-    ulong target = get_time_seconds() + seconds;
+    unsigned long target = get_time_seconds() + seconds;
 
     while (get_time_seconds() < target) {
         yield();
diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c
index a2def83..5506798 100644
--- a/pc-bios/s390-ccw/main.c
+++ b/pc-bios/s390-ccw/main.c
@@ -17,7 +17,6 @@
 #include "virtio-scsi.h"
 #include "dasd-ipl.h"
 
-char stack[PAGE_SIZE * 8] __attribute__((__aligned__(PAGE_SIZE)));
 static SubChannelId blk_schid = { .one = 1 };
 static char loadparm_str[LOADPARM_LEN + 1];
 QemuIplParameters qipl;
diff --git a/pc-bios/s390-ccw/netmain.c b/pc-bios/s390-ccw/netmain.c
index 056e93a..5cd619b 100644
--- a/pc-bios/s390-ccw/netmain.c
+++ b/pc-bios/s390-ccw/netmain.c
@@ -50,7 +50,6 @@
 /* STSI 3.2.2 offset of first vmdb + offset of uuid inside vmdb */
 #define STSI322_VMDB_UUID_OFFSET ((8 + 12) * 4)
 
-char stack[PAGE_SIZE * 8] __attribute__((aligned(PAGE_SIZE)));
 IplParameterBlock iplb __attribute__((aligned(PAGE_SIZE)));
 static char cfgbuf[2048];
 
diff --git a/pc-bios/s390-ccw/s390-ccw.h b/pc-bios/s390-ccw/s390-ccw.h
index b88e055..c977a52 100644
--- a/pc-bios/s390-ccw/s390-ccw.h
+++ b/pc-bios/s390-ccw/s390-ccw.h
@@ -17,11 +17,6 @@
 typedef unsigned short     u16;
 typedef unsigned int       u32;
 typedef unsigned long long u64;
-typedef unsigned long      ulong;
-typedef unsigned char      __u8;
-typedef unsigned short     __u16;
-typedef unsigned int       __u32;
-typedef unsigned long long __u64;
 
 #define true 1
 #define false 0
@@ -55,7 +50,6 @@
 /* main.c */
 void write_subsystem_identification(void);
 void write_iplb_location(void);
-extern char stack[PAGE_SIZE * 8] __attribute__((__aligned__(PAGE_SIZE)));
 unsigned int get_loadparm_index(void);
 void main(void);
 
@@ -67,11 +61,11 @@
 int sclp_read(char *str, size_t count);
 
 /* virtio.c */
-unsigned long virtio_load_direct(ulong rec_list1, ulong rec_list2,
-                                 ulong subchan_id, void *load_addr);
+unsigned long virtio_load_direct(unsigned long rec_list1, unsigned long rec_list2,
+                                 unsigned long subchan_id, void *load_addr);
 bool virtio_is_supported(SubChannelId schid);
 int virtio_blk_setup_device(SubChannelId schid);
-int virtio_read(ulong sector, void *load_addr);
+int virtio_read(unsigned long sector, void *load_addr);
 
 /* bootmap.c */
 void zipl_load(void);
diff --git a/pc-bios/s390-ccw/start.S b/pc-bios/s390-ccw/start.S
index 6072906..061b065 100644
--- a/pc-bios/s390-ccw/start.S
+++ b/pc-bios/s390-ccw/start.S
@@ -10,49 +10,52 @@
  * directory.
  */
 
-        .globl _start
+#define STACK_SIZE        0x8000
+#define STACK_FRAME_SIZE  160
+
+    .globl _start
 _start:
 
-	larl   %r15, stack + 0x8000	/* Set up stack */
+    larl    %r15,stack + STACK_SIZE - STACK_FRAME_SIZE   /* Set up stack */
 
-	/* clear bss */
-	larl %r2, __bss_start
-	larl %r3, _end
-	slgr %r3, %r2		/* get sizeof bss */
-	ltgr	%r3,%r3 	/* bss empty? */
-	jz	done
-	aghi	%r3,-1
-	srlg	%r4,%r3,8	/* how many 256 byte chunks? */
-	ltgr	%r4,%r4
-	lgr	%r1,%r2
-	jz	remainder
+    /* clear bss */
+    larl    %r2,bss_start_literal   /* __bss_start might be unaligned ... */
+    lg      %r2,0(%r2)              /* ... so load it indirectly */
+    larl    %r3,_end
+    slgr    %r3,%r2    /* get sizeof bss */
+    ltgr    %r3,%r3    /* bss empty? */
+    jz      done
+    aghi    %r3,-1
+    srlg    %r4,%r3,8  /* how many 256 byte chunks? */
+    ltgr    %r4,%r4
+    lgr     %r1,%r2
+    jz      remainder
 loop:
-	xc	0(256,%r1),0(%r1)
-	la	%r1,256(%r1)
-	brctg	%r4,loop
+    xc      0(256,%r1),0(%r1)
+    la      %r1,256(%r1)
+    brctg   %r4,loop
 remainder:
-	larl	%r2,memsetxc
-	ex	%r3,0(%r2)
+    larl    %r2,memsetxc
+    ex      %r3,0(%r2)
 done:
-        /* set up a pgm exception disabled wait psw */
-        larl	%r2, disabled_wait_psw
-        mvc	0x01d0(16), 0(%r2)
-        j      main		/* And call C */
+    /* set up a pgm exception disabled wait psw */
+    larl    %r2,disabled_wait_psw
+    mvc     0x01d0(16),0(%r2)
+    j       main       /* And call C */
 
 memsetxc:
-	xc	0(1,%r1),0(%r1)
-
+    xc      0(1,%r1),0(%r1)
 
 /*
  * void disabled_wait(void)
  *
  * stops the current guest cpu.
  */
-	.globl disabled_wait
+    .globl disabled_wait
 disabled_wait:
-	larl	%r1,disabled_wait_psw
-	lpswe	0(%r1)
-1:	j	1b
+    larl    %r1,disabled_wait_psw
+    lpswe   0(%r1)
+1:  j       1b
 
 
 /*
@@ -60,61 +63,69 @@
  *
  * eats one sclp interrupt
  */
-        .globl consume_sclp_int
+    .globl consume_sclp_int
 consume_sclp_int:
-        /* enable service interrupts in cr0 */
-        stctg   %c0,%c0,0(%r15)
-        oi      6(%r15),0x2
-        lctlg   %c0,%c0,0(%r15)
-        /* prepare external call handler */
-        larl %r1, external_new_code
-        stg %r1, 0x1b8
-        larl %r1, external_new_mask
-        mvc 0x1b0(8),0(%r1)
-        /* load enabled wait PSW */
-        larl %r1, enabled_wait_psw
-        lpswe 0(%r1)
+    /* enable service interrupts in cr0 */
+    stctg   %c0,%c0,0(%r15)
+    oi      6(%r15),0x2
+    lctlg   %c0,%c0,0(%r15)
+    /* prepare external call handler */
+    larl    %r1,external_new_code
+    stg     %r1,0x1b8
+    larl    %r1,external_new_mask
+    mvc     0x1b0(8),0(%r1)
+    /* load enabled wait PSW */
+    larl    %r1,enabled_wait_psw
+    lpswe   0(%r1)
 
 /*
  * void consume_io_int(void)
  *
  * eats one I/O interrupt
  */
-        .globl consume_io_int
+    .globl consume_io_int
 consume_io_int:
-        /* enable I/O interrupts in cr6 */
-        stctg %c6,%c6,0(%r15)
-        oi    4(%r15), 0xff
-        lctlg %c6,%c6,0(%r15)
-        /* prepare i/o call handler */
-        larl  %r1, io_new_code
-        stg   %r1, 0x1f8
-        larl  %r1, io_new_mask
-        mvc   0x1f0(8),0(%r1)
-        /* load enabled wait PSW */
-        larl  %r1, enabled_wait_psw
-        lpswe 0(%r1)
+    /* enable I/O interrupts in cr6 */
+    stctg   %c6,%c6,0(%r15)
+    oi      4(%r15), 0xff
+    lctlg   %c6,%c6,0(%r15)
+    /* prepare i/o call handler */
+    larl    %r1,io_new_code
+    stg     %r1,0x1f8
+    larl    %r1,io_new_mask
+    mvc     0x1f0(8),0(%r1)
+    /* load enabled wait PSW */
+    larl    %r1,enabled_wait_psw
+    lpswe   0(%r1)
 
 external_new_code:
-        /* disable service interrupts in cr0 */
-        stctg   %c0,%c0,0(%r15)
-        ni      6(%r15),0xfd
-        lctlg   %c0,%c0,0(%r15)
-        br      %r14
+    /* disable service interrupts in cr0 */
+    stctg   %c0,%c0,0(%r15)
+    ni      6(%r15),0xfd
+    lctlg   %c0,%c0,0(%r15)
+    br      %r14
 
 io_new_code:
-        /* disable I/O interrupts in cr6 */
-        stctg %c6,%c6,0(%r15)
-        ni    4(%r15), 0x00
-        lctlg %c6,%c6,0(%r15)
-        br    %r14
+    /* disable I/O interrupts in cr6 */
+    stctg   %c6,%c6,0(%r15)
+    ni      4(%r15),0x00
+    lctlg   %c6,%c6,0(%r15)
+    br      %r14
 
-        .align  8
+    .align  8
+bss_start_literal:
+    .quad   __bss_start
 disabled_wait_psw:
-        .quad   0x0002000180000000,0x0000000000000000
+    .quad   0x0002000180000000,0x0000000000000000
 enabled_wait_psw:
-        .quad   0x0302000180000000,0x0000000000000000
+    .quad   0x0302000180000000,0x0000000000000000
 external_new_mask:
-        .quad   0x0000000180000000
+    .quad   0x0000000180000000
 io_new_mask:
-        .quad   0x0000000180000000
+    .quad   0x0000000180000000
+
+.bss
+    .align  8
+stack:
+    .space  STACK_SIZE
+    .size   stack,STACK_SIZE
diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c
index 794f99b..a81207b 100644
--- a/pc-bios/s390-ccw/virtio-blkdev.c
+++ b/pc-bios/s390-ccw/virtio-blkdev.c
@@ -16,7 +16,7 @@
 #define VIRTIO_BLK_F_GEOMETRY   (1 << 4)
 #define VIRTIO_BLK_F_BLK_SIZE   (1 << 6)
 
-static int virtio_blk_read_many(VDev *vdev, ulong sector, void *load_addr,
+static int virtio_blk_read_many(VDev *vdev, unsigned long sector, void *load_addr,
                                 int sec_num)
 {
     VirtioBlkOuthdr out_hdr;
@@ -49,7 +49,7 @@
     return status;
 }
 
-int virtio_read_many(ulong sector, void *load_addr, int sec_num)
+int virtio_read_many(unsigned long sector, void *load_addr, int sec_num)
 {
     VDev *vdev = virtio_get_device();
 
@@ -63,14 +63,14 @@
     return -1;
 }
 
-unsigned long virtio_load_direct(ulong rec_list1, ulong rec_list2,
-                                 ulong subchan_id, void *load_addr)
+unsigned long virtio_load_direct(unsigned long rec_list1, unsigned long rec_list2,
+                                 unsigned long subchan_id, void *load_addr)
 {
     u8 status;
     int sec = rec_list1;
     int sec_num = ((rec_list2 >> 32) & 0xffff) + 1;
     int sec_len = rec_list2 >> 48;
-    ulong addr = (ulong)load_addr;
+    unsigned long addr = (unsigned long)load_addr;
 
     if (sec_len != virtio_get_block_size()) {
         return -1;
@@ -86,7 +86,7 @@
     return addr;
 }
 
-int virtio_read(ulong sector, void *load_addr)
+int virtio_read(unsigned long sector, void *load_addr)
 {
     return virtio_read_many(sector, load_addr, 1);
 }
diff --git a/pc-bios/s390-ccw/virtio-scsi.c b/pc-bios/s390-ccw/virtio-scsi.c
index dcce696..d1a84b9 100644
--- a/pc-bios/s390-ccw/virtio-scsi.c
+++ b/pc-bios/s390-ccw/virtio-scsi.c
@@ -150,7 +150,7 @@
 }
 
 static bool scsi_read_10(VDev *vdev,
-                         ulong sector, int sectors, void *data,
+                         unsigned long sector, int sectors, void *data,
                          unsigned int data_size)
 {
     ScsiCdbRead10 cdb = {
@@ -269,7 +269,7 @@
 }
 
 int virtio_scsi_read_many(VDev *vdev,
-                          ulong sector, void *load_addr, int sec_num)
+                          unsigned long sector, void *load_addr, int sec_num)
 {
     int sector_count;
     int f = vdev->blk_factor;
diff --git a/pc-bios/s390-ccw/virtio-scsi.h b/pc-bios/s390-ccw/virtio-scsi.h
index e6b6cd4..c5612e1 100644
--- a/pc-bios/s390-ccw/virtio-scsi.h
+++ b/pc-bios/s390-ccw/virtio-scsi.h
@@ -68,7 +68,7 @@
 }
 
 int virtio_scsi_read_many(VDev *vdev,
-                          ulong sector, void *load_addr, int sec_num);
+                          unsigned long sector, void *load_addr, int sec_num);
 int virtio_scsi_setup_device(SubChannelId schid);
 
 #endif /* VIRTIO_SCSI_H */
diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c
index f37510f..5edd058 100644
--- a/pc-bios/s390-ccw/virtio.c
+++ b/pc-bios/s390-ccw/virtio.c
@@ -48,10 +48,10 @@
 static long kvm_hypercall(unsigned long nr, unsigned long param1,
                           unsigned long param2, unsigned long param3)
 {
-    register ulong r_nr asm("1") = nr;
-    register ulong r_param1 asm("2") = param1;
-    register ulong r_param2 asm("3") = param2;
-    register ulong r_param3 asm("4") = param3;
+    register unsigned long r_nr asm("1") = nr;
+    register unsigned long r_param1 asm("2") = param1;
+    register unsigned long r_param2 asm("3") = param2;
+    register unsigned long r_param3 asm("4") = param3;
     register long retval asm("2");
 
     asm volatile ("diag %%r2,%%r4,0x500"
@@ -145,7 +145,7 @@
         vr->avail->ring[vr->avail->idx % vr->num] = vr->next_idx;
     }
 
-    vr->desc[vr->next_idx].addr = (ulong)p;
+    vr->desc[vr->next_idx].addr = (unsigned long)p;
     vr->desc[vr->next_idx].len = len;
     vr->desc[vr->next_idx].flags = flags & ~VRING_HIDDEN_IS_CHAIN;
     vr->desc[vr->next_idx].next = vr->next_idx;
@@ -182,7 +182,7 @@
  */
 int vring_wait_reply(void)
 {
-    ulong target_second = get_time_seconds() + vdev.wait_reply_timeout;
+    unsigned long target_second = get_time_seconds() + vdev.wait_reply_timeout;
 
     /* Wait for any queue to be updated by the host */
     do {
diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h
index e657d38..85bd9d1 100644
--- a/pc-bios/s390-ccw/virtio.h
+++ b/pc-bios/s390-ccw/virtio.h
@@ -190,14 +190,14 @@
 uint8_t virtio_get_heads(void);
 uint8_t virtio_get_sectors(void);
 uint64_t virtio_get_blocks(void);
-int virtio_read_many(ulong sector, void *load_addr, int sec_num);
+int virtio_read_many(unsigned long sector, void *load_addr, int sec_num);
 
 #define VIRTIO_SECTOR_SIZE 512
 #define VIRTIO_ISO_BLOCK_SIZE 2048
 #define VIRTIO_SCSI_BLOCK_SIZE 512
 #define VIRTIO_DASD_DEFAULT_BLOCK_SIZE 4096
 
-static inline ulong virtio_sector_adjust(ulong sector)
+static inline unsigned long virtio_sector_adjust(unsigned long sector)
 {
     return sector * (virtio_get_block_size() / VIRTIO_SECTOR_SIZE);
 }
diff --git a/pc-bios/s390-netboot.img b/pc-bios/s390-netboot.img
index 682da24..6908e49 100644
--- a/pc-bios/s390-netboot.img
+++ b/pc-bios/s390-netboot.img
Binary files differ
diff --git a/tests/tcg/s390x/head64.S b/tests/tcg/s390x/head64.S
index c6f36df..4fe2883 100644
--- a/tests/tcg/s390x/head64.S
+++ b/tests/tcg/s390x/head64.S
@@ -8,6 +8,8 @@
 #include "../../../pc-bios/s390-ccw/start.S"
 #undef main
 
+.text
+
 main_pre:
     aghi %r15,-160                     /* reserve stack for C code */
     brasl %r14,sclp_setup
@@ -24,8 +26,3 @@
     .quad 0x2000180000000,0xfff        /* see is_special_wait_psw() */
 failure_psw:
     .quad 0x2000180000000,0            /* disabled wait */
-
-    .section .bss
-    .align 0x1000
-stack:
-    .skip 0x8000