mmap emulation


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@158 c046a42c-6fe2-441c-8c8c-71466251a162
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index edb3176..5186e55 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -95,8 +95,6 @@
 #define ZMAGIC 0413
 #define QMAGIC 0314
 
-#define X86_STACK_TOP 0x7d000000
-
 /* max code+data+bss space allocated to elf interpreter */
 #define INTERP_MAP_SIZE (32 * 1024 * 1024)
 
@@ -123,23 +121,11 @@
 #define PER_XENIX		(0x0007 | STICKY_TIMEOUTS)
 
 /* Necessary parameters */
-#define	ALPHA_PAGE_SIZE 4096
-#define	X86_PAGE_SIZE 4096
-
-#define ALPHA_PAGE_MASK (~(ALPHA_PAGE_SIZE-1))
-#define X86_PAGE_MASK (~(X86_PAGE_SIZE-1))
-
-#define ALPHA_PAGE_ALIGN(addr) ((((addr)+ALPHA_PAGE_SIZE)-1)&ALPHA_PAGE_MASK)
-#define X86_PAGE_ALIGN(addr) ((((addr)+X86_PAGE_SIZE)-1)&X86_PAGE_MASK)
-
 #define NGROUPS 32
 
-#define X86_ELF_EXEC_PAGESIZE X86_PAGE_SIZE
-#define X86_ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(X86_ELF_EXEC_PAGESIZE-1))
-#define X86_ELF_PAGEOFFSET(_v) ((_v) & (X86_ELF_EXEC_PAGESIZE-1))
-
-#define ALPHA_ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ALPHA_PAGE_SIZE-1))
-#define ALPHA_ELF_PAGEOFFSET(_v) ((_v) & (ALPHA_PAGE_SIZE-1))
+#define TARGET_ELF_EXEC_PAGESIZE TARGET_PAGE_SIZE
+#define TARGET_ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(TARGET_ELF_EXEC_PAGESIZE-1))
+#define TARGET_ELF_PAGEOFFSET(_v) ((_v) & (TARGET_ELF_EXEC_PAGESIZE-1))
 
 #define INTERPRETER_NONE 0
 #define INTERPRETER_AOUT 1
@@ -160,9 +146,6 @@
 	memcpy(to, from, n);
 }
 
-//extern void * mmap4k();
-#define mmap4k(a, b, c, d, e, f) mmap((void *)(a), b, c, d, e, f)
-
 extern unsigned long x86_stack_size;
 
 static int load_aout_interp(void * exptr, int interp_fd);
@@ -227,8 +210,8 @@
     /* User-space version of kernel get_free_page.  Returns a page-aligned
      * page-sized chunk of memory.
      */
-    retval = mmap4k(0, ALPHA_PAGE_SIZE, PROT_READ|PROT_WRITE, 
-			MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+    retval = (void *)target_mmap(0, host_page_size, PROT_READ|PROT_WRITE, 
+                                 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
 
     if((long)retval == -1) {
 	perror("get_free_page");
@@ -241,7 +224,7 @@
 
 static void free_page(void * pageaddr)
 {
-    (void)munmap(pageaddr, ALPHA_PAGE_SIZE);
+    target_munmap((unsigned long)pageaddr, host_page_size);
 }
 
 /*
@@ -272,9 +255,9 @@
 	while (len) {
 	    --p; --tmp; --len;
 	    if (--offset < 0) {
-		offset = p % X86_PAGE_SIZE;
-		if (!(pag = (char *) page[p/X86_PAGE_SIZE]) &&
-		    !(pag = (char *) page[p/X86_PAGE_SIZE] =
+		offset = p % TARGET_PAGE_SIZE;
+		if (!(pag = (char *) page[p/TARGET_PAGE_SIZE]) &&
+		    !(pag = (char *) page[p/TARGET_PAGE_SIZE] =
 		      (unsigned long *) get_free_page())) {
 			return 0;
 		}
@@ -390,21 +373,21 @@
      * it for args, we'll use it for something else...
      */
     size = x86_stack_size;
-    if (size < MAX_ARG_PAGES*X86_PAGE_SIZE)
-        size = MAX_ARG_PAGES*X86_PAGE_SIZE;
-    error = (unsigned long)mmap4k(NULL, 
-                                  size + X86_PAGE_SIZE,
-                                  PROT_READ | PROT_WRITE,
-                                  MAP_PRIVATE | MAP_ANONYMOUS,
-                                  -1, 0);
+    if (size < MAX_ARG_PAGES*TARGET_PAGE_SIZE)
+        size = MAX_ARG_PAGES*TARGET_PAGE_SIZE;
+    error = target_mmap(0, 
+                        size + host_page_size,
+                        PROT_READ | PROT_WRITE,
+                        MAP_PRIVATE | MAP_ANONYMOUS,
+                        -1, 0);
     if (error == -1) {
         perror("stk mmap");
         exit(-1);
     }
     /* we reserve one extra page at the top of the stack as guard */
-    mprotect((void *)(error + size), X86_PAGE_SIZE, PROT_NONE);
+    target_mprotect(error + size, host_page_size, PROT_NONE);
 
-    stack_base = error + size - MAX_ARG_PAGES*X86_PAGE_SIZE;
+    stack_base = error + size - MAX_ARG_PAGES*TARGET_PAGE_SIZE;
     p += stack_base;
 
     if (bprm->loader) {
@@ -416,10 +399,10 @@
 	if (bprm->page[i]) {
 	    info->rss++;
 
-	    memcpy((void *)stack_base, (void *)bprm->page[i], X86_PAGE_SIZE);
+	    memcpy((void *)stack_base, (void *)bprm->page[i], TARGET_PAGE_SIZE);
 	    free_page((void *)bprm->page[i]);
 	}
-	stack_base += X86_PAGE_SIZE;
+	stack_base += TARGET_PAGE_SIZE;
     }
     return p;
 }
@@ -427,13 +410,13 @@
 static void set_brk(unsigned long start, unsigned long end)
 {
 	/* page-align the start and end addresses... */
-        start = ALPHA_PAGE_ALIGN(start);
-        end = ALPHA_PAGE_ALIGN(end);
+        start = HOST_PAGE_ALIGN(start);
+        end = HOST_PAGE_ALIGN(end);
         if (end <= start)
                 return;
-        if((long)mmap4k(start, end - start,
-                PROT_READ | PROT_WRITE | PROT_EXEC,
-                MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0) == -1) {
+        if(target_mmap(start, end - start,
+                       PROT_READ | PROT_WRITE | PROT_EXEC,
+                       MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0) == -1) {
 	    perror("cannot mmap brk");
 	    exit(-1);
 	}
@@ -451,9 +434,9 @@
         unsigned long nbyte;
         char * fpnt;
 
-        nbyte = elf_bss & (ALPHA_PAGE_SIZE-1);	/* was X86_PAGE_SIZE - JRP */
+        nbyte = elf_bss & (host_page_size-1);	/* was TARGET_PAGE_SIZE - JRP */
         if (nbyte) {
-	    nbyte = ALPHA_PAGE_SIZE - nbyte;
+	    nbyte = host_page_size - nbyte;
 	    fpnt = (char *) elf_bss;
 	    do {
 		*fpnt++ = 0;
@@ -494,7 +477,7 @@
           NEW_AUX_ENT (AT_PHDR, (target_ulong)(load_addr + exec->e_phoff));
           NEW_AUX_ENT (AT_PHENT, (target_ulong)(sizeof (struct elf_phdr)));
           NEW_AUX_ENT (AT_PHNUM, (target_ulong)(exec->e_phnum));
-          NEW_AUX_ENT (AT_PAGESZ, (target_ulong)(ALPHA_PAGE_SIZE));
+          NEW_AUX_ENT (AT_PAGESZ, (target_ulong)(TARGET_PAGE_SIZE));
           NEW_AUX_ENT (AT_BASE, (target_ulong)(interp_load_addr));
           NEW_AUX_ENT (AT_FLAGS, (target_ulong)0);
           NEW_AUX_ENT (AT_ENTRY, load_bias + exec->e_entry);
@@ -554,7 +537,7 @@
 
 	/* Now read in all of the header information */
 	
-	if (sizeof(struct elf_phdr) * interp_elf_ex->e_phnum > X86_PAGE_SIZE)
+	if (sizeof(struct elf_phdr) * interp_elf_ex->e_phnum > TARGET_PAGE_SIZE)
 	    return ~0UL;
 	
 	elf_phdata =  (struct elf_phdr *) 
@@ -594,9 +577,9 @@
         if (interp_elf_ex->e_type == ET_DYN) {
             /* in order to avoid harcoding the interpreter load
                address in qemu, we allocate a big enough memory zone */
-            error = (unsigned long)mmap4k(NULL, INTERP_MAP_SIZE,
-                                          PROT_NONE, MAP_PRIVATE | MAP_ANON, 
-                                          -1, 0);
+            error = target_mmap(0, INTERP_MAP_SIZE,
+                                PROT_NONE, MAP_PRIVATE | MAP_ANON, 
+                                -1, 0);
             if (error == -1) {
                 perror("mmap");
                 exit(-1);
@@ -620,12 +603,12 @@
 	    	elf_type |= MAP_FIXED;
 	    	vaddr = eppnt->p_vaddr;
 	    }
-	    error = (unsigned long)mmap4k(load_addr+X86_ELF_PAGESTART(vaddr),
-		 eppnt->p_filesz + X86_ELF_PAGEOFFSET(eppnt->p_vaddr),
+	    error = target_mmap(load_addr+TARGET_ELF_PAGESTART(vaddr),
+		 eppnt->p_filesz + TARGET_ELF_PAGEOFFSET(eppnt->p_vaddr),
 		 elf_prot,
 		 elf_type,
 		 interpreter_fd,
-		 eppnt->p_offset - X86_ELF_PAGEOFFSET(eppnt->p_vaddr));
+		 eppnt->p_offset - TARGET_ELF_PAGEOFFSET(eppnt->p_vaddr));
 	    
 	    if (error > -1024UL) {
 	      /* Real error */
@@ -665,13 +648,13 @@
 	 * bss page.
 	 */
 	padzero(elf_bss);
-	elf_bss = X86_ELF_PAGESTART(elf_bss + ALPHA_PAGE_SIZE - 1); /* What we have mapped so far */
+	elf_bss = TARGET_ELF_PAGESTART(elf_bss + host_page_size - 1); /* What we have mapped so far */
 
 	/* Map the last of the bss segment */
 	if (last_bss > elf_bss) {
-	  mmap4k(elf_bss, last_bss-elf_bss,
-		  PROT_READ|PROT_WRITE|PROT_EXEC,
-		  MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+            target_mmap(elf_bss, last_bss-elf_bss,
+                        PROT_READ|PROT_WRITE|PROT_EXEC,
+                        MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
 	}
 	free(elf_phdata);
 
@@ -742,7 +725,7 @@
     unsigned int interpreter_type = INTERPRETER_NONE;
     unsigned char ibcs2_interpreter;
     int i;
-    void * mapped_addr;
+    unsigned long mapped_addr;
     struct elf_phdr * elf_ppnt;
     struct elf_phdr *elf_phdata;
     unsigned long elf_bss, k, elf_brk;
@@ -979,33 +962,32 @@
                is because the brk will follow the loader, and is not movable.  */
             /* NOTE: for qemu, we do a big mmap to get enough space
                without harcoding any address */
-            error = (unsigned long)mmap4k(NULL, ET_DYN_MAP_SIZE,
-                                          PROT_NONE, MAP_PRIVATE | MAP_ANON, 
-                                          -1, 0);
+            error = target_mmap(0, ET_DYN_MAP_SIZE,
+                                PROT_NONE, MAP_PRIVATE | MAP_ANON, 
+                                -1, 0);
             if (error == -1) {
                 perror("mmap");
                 exit(-1);
             }
-            load_bias = X86_ELF_PAGESTART(error - elf_ppnt->p_vaddr);
+            load_bias = TARGET_ELF_PAGESTART(error - elf_ppnt->p_vaddr);
         }
         
-        error = (unsigned long)mmap4k(
-                                      X86_ELF_PAGESTART(load_bias + elf_ppnt->p_vaddr),
-                                      (elf_ppnt->p_filesz +
-                                       X86_ELF_PAGEOFFSET(elf_ppnt->p_vaddr)),
-                                      elf_prot,
-                                      (MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE),
-                                      bprm->fd,
-                                      (elf_ppnt->p_offset - 
-                                       X86_ELF_PAGEOFFSET(elf_ppnt->p_vaddr)));
+        error = target_mmap(TARGET_ELF_PAGESTART(load_bias + elf_ppnt->p_vaddr),
+                            (elf_ppnt->p_filesz +
+                             TARGET_ELF_PAGEOFFSET(elf_ppnt->p_vaddr)),
+                            elf_prot,
+                            (MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE),
+                            bprm->fd,
+                            (elf_ppnt->p_offset - 
+                             TARGET_ELF_PAGEOFFSET(elf_ppnt->p_vaddr)));
         if (error == -1) {
             perror("mmap");
             exit(-1);
         }
 
 #ifdef LOW_ELF_STACK
-        if (X86_ELF_PAGESTART(elf_ppnt->p_vaddr) < elf_stack)
-            elf_stack = X86_ELF_PAGESTART(elf_ppnt->p_vaddr);
+        if (TARGET_ELF_PAGESTART(elf_ppnt->p_vaddr) < elf_stack)
+            elf_stack = TARGET_ELF_PAGESTART(elf_ppnt->p_vaddr);
 #endif
         
         if (!load_addr_set) {
@@ -1013,7 +995,7 @@
             load_addr = elf_ppnt->p_vaddr - elf_ppnt->p_offset;
             if (elf_ex.e_type == ET_DYN) {
                 load_bias += error -
-                    X86_ELF_PAGESTART(load_bias + elf_ppnt->p_vaddr);
+                    TARGET_ELF_PAGESTART(load_bias + elf_ppnt->p_vaddr);
                 load_addr += load_bias;
             }
         }
@@ -1108,8 +1090,8 @@
 	       and some applications "depend" upon this behavior.
 	       Since we do not have the power to recompile these, we
 	       emulate the SVr4 behavior.  Sigh.  */
-	    mapped_addr = mmap4k(NULL, ALPHA_PAGE_SIZE, PROT_READ | PROT_EXEC,
-			    MAP_FIXED | MAP_PRIVATE, -1, 0);
+	    mapped_addr = target_mmap(0, host_page_size, PROT_READ | PROT_EXEC,
+                                      MAP_FIXED | MAP_PRIVATE, -1, 0);
     }
 
 #ifdef ELF_PLAT_INIT
@@ -1137,7 +1119,7 @@
         int retval;
         int i;
 
-        bprm.p = X86_PAGE_SIZE*MAX_ARG_PAGES-sizeof(unsigned int);
+        bprm.p = TARGET_PAGE_SIZE*MAX_ARG_PAGES-sizeof(unsigned int);
         for (i=0 ; i<MAX_ARG_PAGES ; i++)       /* clear page-table */
                 bprm.page[i] = 0;
         retval = open(filename, O_RDONLY);
diff --git a/linux-user/main.c b/linux-user/main.c
index 00dc271..a6a84e5 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -231,13 +231,16 @@
            "usage: qemu [-h] [-d] [-L path] [-s size] program [arguments...]\n"
            "Linux x86 emulator\n"
            "\n"
-           "-h        print this help\n"
-           "-d        activate log (logfile=%s)\n"
-           "-L path   set the x86 elf interpreter prefix (default=%s)\n"
-           "-s size   set the x86 stack size in bytes (default=%ld)\n",
-           DEBUG_LOGFILE,
+           "-h           print this help\n"
+           "-L path      set the x86 elf interpreter prefix (default=%s)\n"
+           "-s size      set the x86 stack size in bytes (default=%ld)\n"
+           "\n"
+           "debug options:\n"
+           "-d           activate log (logfile=%s)\n"
+           "-p pagesize  set the host page size to 'pagesize'\n",
            interp_prefix, 
-           x86_stack_size);
+           x86_stack_size,
+           DEBUG_LOGFILE);
     _exit(1);
 }
 
@@ -284,6 +287,13 @@
                 x86_stack_size *= 1024;
         } else if (!strcmp(r, "L")) {
             interp_prefix = argv[optind++];
+        } else if (!strcmp(r, "p")) {
+            host_page_size = atoi(argv[optind++]);
+            if (host_page_size == 0 ||
+                (host_page_size & (host_page_size - 1)) != 0) {
+                fprintf(stderr, "page size must be a power of two\n");
+                exit(1);
+            }
         } else {
             usage();
         }
@@ -311,12 +321,18 @@
     /* Scan interp_prefix dir for replacement files. */
     init_paths(interp_prefix);
 
+    /* NOTE: we need to init the CPU at this stage to get the
+       host_page_size */
+    env = cpu_x86_init();
+
     if (elf_exec(filename, argv+optind, environ, regs, info) != 0) {
 	printf("Error loading %s\n", filename);
 	_exit(1);
     }
     
     if (loglevel) {
+        page_dump(logfile);
+    
         fprintf(logfile, "start_brk   0x%08lx\n" , info->start_brk);
         fprintf(logfile, "end_code    0x%08lx\n" , info->end_code);
         fprintf(logfile, "start_code  0x%08lx\n" , info->start_code);
@@ -331,7 +347,6 @@
     syscall_init();
     signal_init();
 
-    env = cpu_x86_init();
     global_env = env;
 
     /* build Task State */
diff --git a/linux-user/mmap.c b/linux-user/mmap.c
new file mode 100644
index 0000000..4c4d910
--- /dev/null
+++ b/linux-user/mmap.c
@@ -0,0 +1,370 @@
+/*
+ *  mmap support for qemu
+ * 
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/mman.h>
+
+#include "qemu.h"
+
+//#define DEBUG_MMAP
+
+/* NOTE: all the constants are the HOST ones */
+int target_mprotect(unsigned long start, unsigned long len, int prot)
+{
+    unsigned long end, host_start, host_end, addr;
+    int prot1, ret;
+
+#ifdef DEBUG_MMAP
+    printf("mprotect: start=0x%lx len=0x%lx prot=%c%c%c\n", start, len,
+           prot & PROT_READ ? 'r' : '-',
+           prot & PROT_WRITE ? 'w' : '-',
+           prot & PROT_EXEC ? 'x' : '-');
+#endif
+
+    if ((start & ~TARGET_PAGE_MASK) != 0)
+        return -EINVAL;
+    len = TARGET_PAGE_ALIGN(len);
+    end = start + len;
+    if (end < start)
+        return -EINVAL;
+    if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC))
+        return -EINVAL;
+    if (len == 0)
+        return 0;
+    
+    host_start = start & host_page_mask;
+    host_end = HOST_PAGE_ALIGN(end);
+    if (start > host_start) {
+        /* handle host page containing start */
+        prot1 = prot;
+        for(addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
+            prot1 |= page_get_flags(addr);
+        }
+        ret = mprotect((void *)host_start, host_page_size, prot1 & PAGE_BITS);
+        if (ret != 0)
+            return ret;
+        host_start += host_page_size;
+    }
+    if (end < host_end) {
+        /* handle host page containing end (can be the same as first page) */
+        prot1 = prot;
+        for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
+            prot1 |= page_get_flags(addr);
+        }
+        ret = mprotect((void *)(host_end - host_page_size), host_page_size, 
+                       prot1 & PAGE_BITS);
+        if (ret != 0)
+            return ret;
+        host_end -= host_page_size;
+    }
+    
+    /* handle the pages in the middle */
+    if (host_start < host_end) {
+        ret = mprotect((void *)host_start, host_end - host_start, prot);
+        if (ret != 0)
+            return ret;
+    }
+
+    page_set_flags(start, start + len, prot | PAGE_VALID);
+    return 0;
+}
+
+/* map an incomplete host page */
+int mmap_frag(unsigned long host_start, 
+               unsigned long start, unsigned long end, 
+               int prot, int flags, int fd, unsigned long offset)
+{
+    unsigned long host_end, ret, addr;
+    int prot1, prot_new;
+
+    host_end = host_start + host_page_size;
+
+    /* get the protection of the target pages outside the mapping */
+    prot1 = 0;
+    for(addr = host_start; addr < host_end; addr++) {
+        if (addr < start || addr >= end)
+            prot1 |= page_get_flags(addr);
+    }
+    
+    if (prot1 == 0) {
+        /* no page was there, so we allocate one */
+        ret = (long)mmap((void *)host_start, host_page_size, prot, 
+                         flags | MAP_ANONYMOUS, -1, 0);
+        if (ret == -1)
+            return ret;
+    }
+    prot1 &= PAGE_BITS;
+
+    prot_new = prot | prot1;
+    if (!(flags & MAP_ANONYMOUS)) {
+        /* msync() won't work here, so we return an error if write is
+           possible while it is a shared mapping */
+        if ((flags & MAP_TYPE) == MAP_SHARED &&
+            (prot & PROT_WRITE))
+            return -EINVAL;
+
+        /* adjust protection to be able to read */
+        if (!(prot1 & PROT_WRITE))
+            mprotect((void *)host_start, host_page_size, prot1 | PROT_WRITE);
+        
+        /* read the corresponding file data */
+        pread(fd, (void *)start, end - start, offset);
+        
+        /* put final protection */
+        if (prot_new != (prot1 | PROT_WRITE))
+            mprotect((void *)host_start, host_page_size, prot_new);
+    } else {
+        /* just update the protection */
+        if (prot_new != prot1) {
+            mprotect((void *)host_start, host_page_size, prot_new);
+        }
+    }
+    return 0;
+}
+
+/* NOTE: all the constants are the HOST ones */
+long target_mmap(unsigned long start, unsigned long len, int prot, 
+                 int flags, int fd, unsigned long offset)
+{
+    unsigned long ret, end, host_start, host_end, retaddr, host_offset, host_len;
+
+#ifdef DEBUG_MMAP
+    {
+        printf("mmap: start=0x%lx len=0x%lx prot=%c%c%c flags=",
+               start, len, 
+               prot & PROT_READ ? 'r' : '-',
+               prot & PROT_WRITE ? 'w' : '-',
+               prot & PROT_EXEC ? 'x' : '-');
+        if (flags & MAP_FIXED)
+            printf("MAP_FIXED ");
+        if (flags & MAP_ANONYMOUS)
+            printf("MAP_ANON ");
+        switch(flags & MAP_TYPE) {
+        case MAP_PRIVATE:
+            printf("MAP_PRIVATE ");
+            break;
+        case MAP_SHARED:
+            printf("MAP_SHARED ");
+            break;
+        default:
+            printf("[MAP_TYPE=0x%x] ", flags & MAP_TYPE);
+            break;
+        }
+        printf("fd=%d offset=%lx\n", fd, offset);
+    }
+#endif
+
+    if (offset & ~TARGET_PAGE_MASK)
+        return -EINVAL;
+
+    len = TARGET_PAGE_ALIGN(len);
+    if (len == 0)
+        return start;
+    host_start = start & host_page_mask;
+
+    if (!(flags & MAP_FIXED)) {
+        if (host_page_size != real_host_page_size) {
+            /* NOTE: this code is only for debugging with '-p' option */
+            /* reserve a memory area */
+            host_len = HOST_PAGE_ALIGN(len) + host_page_size - TARGET_PAGE_SIZE;
+            host_start = (long)mmap((void *)host_start, host_len, PROT_NONE, 
+                                    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+            if (host_start == -1)
+                return host_start;
+            host_end = host_start + host_len;
+            start = HOST_PAGE_ALIGN(host_start);
+            end = start + HOST_PAGE_ALIGN(len);
+            if (start > host_start)
+                munmap((void *)host_start, start - host_start);
+            if (end < host_end)
+                munmap((void *)end, host_end - end);
+            /* use it as a fixed mapping */
+            flags |= MAP_FIXED;
+        } else {
+            /* if not fixed, no need to do anything */
+            host_offset = offset & host_page_mask;
+            host_len = len + offset - host_offset;
+            start = (long)mmap((void *)host_start, host_len, 
+                               prot, flags, fd, host_offset);
+            if (start == -1)
+                return start;
+            /* update start so that it points to the file position at 'offset' */
+            if (!(flags & MAP_ANONYMOUS)) 
+                start += offset - host_offset;
+            goto the_end1;
+        }
+    }
+    
+    if (start & ~TARGET_PAGE_MASK)
+        return -EINVAL;
+    end = start + len;
+    host_end = HOST_PAGE_ALIGN(end);
+
+    /* worst case: we cannot map the file because the offset is not
+       aligned, so we read it */
+    if (!(flags & MAP_ANONYMOUS) &&
+        (offset & ~host_page_mask) != (start & ~host_page_mask)) {
+        /* msync() won't work here, so we return an error if write is
+           possible while it is a shared mapping */
+        if ((flags & MAP_TYPE) == MAP_SHARED &&
+            (prot & PROT_WRITE))
+            return -EINVAL;
+        retaddr = target_mmap(start, len, prot | PROT_WRITE, 
+                              MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, 
+                              -1, 0);
+        if (retaddr == -1)
+            return retaddr;
+        pread(fd, (void *)start, len, offset);
+        if (!(prot & PROT_WRITE)) {
+            ret = target_mprotect(start, len, prot);
+            if (ret != 0)
+                return ret;
+        }
+        goto the_end;
+    }
+
+    /* handle the start of the mapping */
+    if (start > host_start) {
+        if (host_end == host_start + host_page_size) {
+            /* one single host page */
+            ret = mmap_frag(host_start, start, end,
+                            prot, flags, fd, offset);
+            if (ret == -1)
+                return ret;
+            goto the_end1;
+        }
+        ret = mmap_frag(host_start, start, host_start + host_page_size,
+                        prot, flags, fd, offset);
+        if (ret == -1)
+            return ret;
+        host_start += host_page_size;
+    }
+    /* handle the end of the mapping */
+    if (end < host_end) {
+        ret = mmap_frag(host_end - host_page_size, 
+                        host_end - host_page_size, host_end,
+                        prot, flags, fd, 
+                        offset + host_end - host_page_size - start);
+        if (ret == -1)
+            return ret;
+        host_end -= host_page_size;
+    }
+    
+    /* map the middle (easier) */
+    if (host_start < host_end) {
+        ret = (long)mmap((void *)host_start, host_end - host_start, 
+                         prot, flags, fd, offset + host_start - start);
+        if (ret == -1)
+            return ret;
+    }
+ the_end1:
+    page_set_flags(start, start + len, prot | PAGE_VALID);
+ the_end:
+#ifdef DEBUG_MMAP
+    page_dump(stdout);
+    printf("\n");
+#endif
+    return start;
+}
+
+int target_munmap(unsigned long start, unsigned long len)
+{
+    unsigned long end, host_start, host_end, addr;
+    int prot, ret;
+
+#ifdef DEBUG_MMAP
+    printf("munmap: start=0x%lx len=0x%lx\n", start, len);
+#endif
+    if (start & ~TARGET_PAGE_MASK)
+        return -EINVAL;
+    len = TARGET_PAGE_ALIGN(len);
+    if (len == 0)
+        return -EINVAL;
+    end = start + len;
+    host_start = start & host_page_mask;
+    host_end = HOST_PAGE_ALIGN(end);
+
+    if (start > host_start) {
+        /* handle host page containing start */
+        prot = 0;
+        for(addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
+            prot |= page_get_flags(addr);
+        }
+        if (prot != 0)
+            host_start += host_page_size;
+    }
+    if (end < host_end) {
+        /* handle host page containing end (can be the same as first page) */
+        prot = 0;
+        for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
+            prot |= page_get_flags(addr);
+        }
+        if (prot != 0)
+            host_end -= host_page_size;
+    }
+    
+    /* unmap what we can */
+    if (host_start < host_end) {
+        ret = munmap((void *)host_start, host_end - host_start);
+        if (ret != 0)
+            return ret;
+    }
+
+    page_set_flags(start, start + len, 0);
+    return 0;
+}
+
+/* XXX: currently, we only handle MAP_ANONYMOUS and not MAP_FIXED
+   blocks which have been allocated starting on a host page */
+long target_mremap(unsigned long old_addr, unsigned long old_size, 
+                   unsigned long new_size, unsigned long flags,
+                   unsigned long new_addr)
+{
+    int prot;
+
+    /* XXX: use 5 args syscall */
+    new_addr = (long)mremap((void *)old_addr, old_size, new_size, flags);
+    if (new_addr == -1)
+        return new_addr;
+    prot = page_get_flags(old_addr);
+    page_set_flags(old_addr, old_addr + old_size, 0);
+    page_set_flags(new_addr, new_addr + new_size, prot | PAGE_VALID);
+    return new_addr;
+}
+
+int target_msync(unsigned long start, unsigned long len, int flags)
+{
+    unsigned long end;
+
+    if (start & ~TARGET_PAGE_MASK)
+        return -EINVAL;
+    len = TARGET_PAGE_ALIGN(len);
+    if (len == 0)
+        return 0;
+    end = start + len;
+    
+    start &= host_page_mask;
+    return msync((void *)start, len, flags);
+}
+
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 0f004ff..7e23f27 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -88,4 +88,14 @@
 int do_vm86(CPUX86State *env, long subfunction, 
             struct target_vm86plus_struct * target_v86);
 
+/* mmap.c */
+int target_mprotect(unsigned long start, unsigned long len, int prot);
+long target_mmap(unsigned long start, unsigned long len, int prot, 
+                 int flags, int fd, unsigned long offset);
+int target_munmap(unsigned long start, unsigned long len);
+long target_mremap(unsigned long old_addr, unsigned long old_size, 
+                   unsigned long new_size, unsigned long flags,
+                   unsigned long new_addr);
+int target_msync(unsigned long start, unsigned long len, int flags);
+
 #endif
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 38e242b..ac7a111 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -64,11 +64,6 @@
 
 //#define DEBUG
 
-#ifndef PAGE_SIZE
-#define PAGE_SIZE 4096
-#define PAGE_MASK ~(PAGE_SIZE - 1)
-#endif
-
 //#include <linux/msdos_fs.h>
 #define	VFAT_IOCTL_READDIR_BOTH		_IOR('r', 1, struct dirent [2])
 #define	VFAT_IOCTL_READDIR_SHORT	_IOR('r', 2, struct dirent [2])
@@ -153,7 +148,7 @@
     if (new_brk < target_original_brk)
         return -ENOMEM;
     
-    brk_page = (char *)(((unsigned long)target_brk + PAGE_SIZE - 1) & PAGE_MASK);
+    brk_page = (char *)HOST_PAGE_ALIGN((unsigned long)target_brk);
 
     /* If the new brk is less than this, set it and we're done... */
     if (new_brk < brk_page) {
@@ -162,11 +157,10 @@
     }
 
     /* We need to allocate more memory after the brk... */
-    new_alloc_size = ((new_brk - brk_page + 1)+(PAGE_SIZE-1)) & PAGE_MASK;
-    mapped_addr = get_errno((long)mmap((caddr_t)brk_page, new_alloc_size, 
-                                       PROT_READ|PROT_WRITE,
-                                       MAP_ANON|MAP_FIXED|MAP_PRIVATE, 0, 0));
-    
+    new_alloc_size = HOST_PAGE_ALIGN(new_brk - brk_page + 1);
+    mapped_addr = get_errno(target_mmap((unsigned long)brk_page, new_alloc_size, 
+                                        PROT_READ|PROT_WRITE,
+                                        MAP_ANON|MAP_FIXED|MAP_PRIVATE, 0, 0));
     if (is_error(mapped_addr)) {
 	return mapped_addr;
     } else {
@@ -1709,7 +1703,7 @@
             v4 = tswap32(vptr[3]);
             v5 = tswap32(vptr[4]);
             v6 = tswap32(vptr[5]);
-            ret = get_errno((long)mmap((void *)v1, v2, v3, v4, v5, v6));
+            ret = get_errno(target_mmap(v1, v2, v3, v4, v5, v6));
         }
         break;
 #endif
@@ -1718,16 +1712,16 @@
 #else
     case TARGET_NR_mmap:
 #endif
-        ret = get_errno((long)mmap((void *)arg1, arg2, arg3, arg4, arg5, arg6));
+        ret = get_errno(target_mmap(arg1, arg2, arg3, arg4, arg5, arg6));
         break;
     case TARGET_NR_munmap:
-        ret = get_errno(munmap((void *)arg1, arg2));
+        ret = get_errno(target_munmap(arg1, arg2));
         break;
     case TARGET_NR_mprotect:
-        ret = get_errno(mprotect((void *)arg1, arg2, arg3));
+        ret = get_errno(target_mprotect(arg1, arg2, arg3));
         break;
     case TARGET_NR_mremap:
-        ret = get_errno((long)mremap((void *)arg1, arg2, arg3, arg4));
+        ret = get_errno(target_mremap(arg1, arg2, arg3, arg4, arg5));
         break;
     case TARGET_NR_msync:
         ret = get_errno(msync((void *)arg1, arg2, arg3));