optionrom: add a DMA-enabled multiboot ROM

Add a new option rom for the multiboot loader, using DMA transfers to copy
data instead of "rep insb".

This significantly lowers QEMU's startup latency by a factor of about 40,
for example, going from 30sec to 0.8sec when loading modules of 120MB
in size.

Signed-off-by: Marcus Hähnel <marcus.haehnel@kernkonzept.com>
Signed-off-by: Adam Lackorzynski <adam@l4re.org>
[Modified to keep the non-DMA code depending on #ifdef USE_FW_CFG_DMA;
 do not write below stack. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
diff --git a/pc-bios/meson.build b/pc-bios/meson.build
index a44c9bc..b40ff3f 100644
--- a/pc-bios/meson.build
+++ b/pc-bios/meson.build
@@ -63,6 +63,7 @@
   'petalogix-s3adsp1800.dtb',
   'petalogix-ml605.dtb',
   'multiboot.bin',
+  'multiboot_dma.bin',
   'linuxboot.bin',
   'linuxboot_dma.bin',
   'kvmvapic.bin',
diff --git a/pc-bios/multiboot_dma.bin b/pc-bios/multiboot_dma.bin
new file mode 100644
index 0000000..c0e2c31
--- /dev/null
+++ b/pc-bios/multiboot_dma.bin
Binary files differ
diff --git a/pc-bios/optionrom/Makefile b/pc-bios/optionrom/Makefile
index 3482508..5d55d25 100644
--- a/pc-bios/optionrom/Makefile
+++ b/pc-bios/optionrom/Makefile
@@ -2,7 +2,7 @@
 SRC_DIR := $(TOPSRC_DIR)/pc-bios/optionrom
 VPATH = $(SRC_DIR)
 
-all: multiboot.bin linuxboot.bin linuxboot_dma.bin kvmvapic.bin pvh.bin
+all: multiboot.bin multiboot_dma.bin linuxboot.bin linuxboot_dma.bin kvmvapic.bin pvh.bin
 # Dummy command so that make thinks it has done something
 	@true
 
@@ -41,8 +41,6 @@
 LD_I386_EMULATION ?= elf_i386
 override LDFLAGS = -m $(LD_I386_EMULATION) -T $(SRC_DIR)/flat.lds
 
-all: multiboot.bin linuxboot.bin linuxboot_dma.bin kvmvapic.bin pvh.bin
-
 pvh.img: pvh.o pvh_main.o
 
 %.o: %.S
diff --git a/pc-bios/optionrom/multiboot.S b/pc-bios/optionrom/multiboot.S
index b7efe4d..181a4b0 100644
--- a/pc-bios/optionrom/multiboot.S
+++ b/pc-bios/optionrom/multiboot.S
@@ -68,7 +68,7 @@
 	mov		%eax, %es
 
 	/* Read the bootinfo struct into RAM */
-	read_fw_blob(FW_CFG_INITRD)
+	read_fw_blob_dma(FW_CFG_INITRD)
 
 	/* FS = bootinfo_struct */
 	read_fw		FW_CFG_INITRD_ADDR
@@ -188,7 +188,7 @@
 	movl		%eax, %gs
 
 	/* Read the kernel and modules into RAM */
-	read_fw_blob(FW_CFG_KERNEL)
+	read_fw_blob_dma(FW_CFG_KERNEL)
 
 	/* Jump off to the kernel */
 	read_fw		FW_CFG_KERNEL_ENTRY
diff --git a/pc-bios/optionrom/multiboot_dma.S b/pc-bios/optionrom/multiboot_dma.S
new file mode 100644
index 0000000..d809af3
--- /dev/null
+++ b/pc-bios/optionrom/multiboot_dma.S
@@ -0,0 +1,2 @@
+#define USE_FW_CFG_DMA 1
+#include "multiboot.S"
diff --git a/pc-bios/optionrom/optionrom.h b/pc-bios/optionrom/optionrom.h
index a2b612f..8d74c0d 100644
--- a/pc-bios/optionrom/optionrom.h
+++ b/pc-bios/optionrom/optionrom.h
@@ -37,6 +37,17 @@
 #define BIOS_CFG_IOPORT_CFG	0x510
 #define BIOS_CFG_IOPORT_DATA	0x511
 
+#define FW_CFG_DMA_CTL_ERROR   0x01
+#define FW_CFG_DMA_CTL_READ    0x02
+#define FW_CFG_DMA_CTL_SKIP    0x04
+#define FW_CFG_DMA_CTL_SELECT  0x08
+#define FW_CFG_DMA_CTL_WRITE   0x10
+
+#define FW_CFG_DMA_SIGNATURE 0x51454d5520434647ULL /* "QEMU CFG" */
+
+#define BIOS_CFG_DMA_ADDR_HIGH  0x514
+#define BIOS_CFG_DMA_ADDR_LOW   0x518
+
 /* Break the translation block flow so -d cpu shows us values */
 #define DEBUG_HERE \
 	jmp		1f;				\
@@ -62,6 +73,61 @@
 	bswap		%eax
 .endm
 
+
+/*
+ * Read data from the fw_cfg device using DMA.
+ * Clobbers:	%edx, %eax, ADDR, SIZE, memory[%esp-16] to memory[%esp]
+ */
+.macro read_fw_dma VAR, SIZE, ADDR
+        /* Address */
+	bswapl		\ADDR
+	pushl		\ADDR
+
+	/* We only support 32 bit target addresses */
+	xorl		%eax, %eax
+	pushl		%eax
+	mov		$BIOS_CFG_DMA_ADDR_HIGH, %dx
+	outl		%eax, (%dx)
+
+	/* Size */
+	bswapl		\SIZE
+	pushl		\SIZE
+
+        /* Control */
+	movl		$(\VAR << 16) | (FW_CFG_DMA_CTL_READ | FW_CFG_DMA_CTL_SELECT), %eax
+	bswapl		%eax
+	pushl		%eax
+
+	movl		%esp, %eax /* Address of the struct we generated */
+	bswapl		%eax
+	mov		$BIOS_CFG_DMA_ADDR_LOW, %dx
+	outl		%eax, (%dx) /* Initiate DMA */
+
+1:  mov		(%esp), %eax /* Wait for completion */
+	bswapl		%eax
+	testl		$~FW_CFG_DMA_CTL_ERROR, %eax
+	jnz		1b
+       addl            $16, %esp
+.endm
+
+
+/*
+ * Read a blob from the fw_cfg device using DMA
+ * Requires _ADDR, _SIZE and _DATA values for the parameter.
+ *
+ * Clobbers:	%eax, %edx, %es, %ecx, %edi and adresses %esp-20 to %esp
+ */
+#ifdef USE_FW_CFG_DMA
+#define read_fw_blob_dma(var) \
+	read_fw		var ## _SIZE; \
+	mov		%eax, %ecx; \
+	read_fw		var ## _ADDR; \
+	mov		%eax, %edi ;\
+	read_fw_dma	var ## _DATA, %ecx, %edi
+#else
+#define read_fw_blob_dma(var) read_fw_blob(var)
+#endif
+
 #define read_fw_blob_pre(var)				\
 	read_fw		var ## _SIZE;			\
 	mov		%eax, %ecx;			\