target-ppc: memory load/store rework

Rework the memory load/store:
- Unify load/store functions for 32-bit and 64-bit CPU
- Don't swap values twice for bit-reverse load/store functions
  in little endian mode.
- On a 64-bit CPU in 32-bit mode, do the address truncation for
  address computation instead of every load store. Truncate the
  address when incrementing the address (if needed)
- Cache writes to access_types.
- Add a few missing calls to gen_set_access_type()

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>

git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5949 c046a42c-6fe2-441c-8c8c-71466251a162
diff --git a/target-ppc/op_helper.c b/target-ppc/op_helper.c
index fd635a3..a88e253 100644
--- a/target-ppc/op_helper.c
+++ b/target-ppc/op_helper.c
@@ -257,47 +257,51 @@
 /*****************************************************************************/
 /* Memory load and stores */
 
-static always_inline target_ulong get_addr(target_ulong addr)
+static always_inline target_ulong addr_add(target_ulong addr, target_long arg)
 {
 #if defined(TARGET_PPC64)
-        if (msr_sf)
-            return addr;
+        if (!msr_sf)
+            return (uint32_t)(addr + arg);
         else
 #endif
-            return (uint32_t)addr;
+            return addr + arg;
 }
 
 void helper_lmw (target_ulong addr, uint32_t reg)
 {
-    for (; reg < 32; reg++, addr += 4) {
+    for (; reg < 32; reg++) {
         if (msr_le)
-            env->gpr[reg] = bswap32(ldl(get_addr(addr)));
+            env->gpr[reg] = bswap32(ldl(addr));
         else
-            env->gpr[reg] = ldl(get_addr(addr));
+            env->gpr[reg] = ldl(addr);
+	addr = addr_add(addr, 4);
     }
 }
 
 void helper_stmw (target_ulong addr, uint32_t reg)
 {
-    for (; reg < 32; reg++, addr += 4) {
+    for (; reg < 32; reg++) {
         if (msr_le)
-            stl(get_addr(addr), bswap32((uint32_t)env->gpr[reg]));
+            stl(addr, bswap32((uint32_t)env->gpr[reg]));
         else
-            stl(get_addr(addr), (uint32_t)env->gpr[reg]);
+            stl(addr, (uint32_t)env->gpr[reg]);
+	addr = addr_add(addr, 4);
     }
 }
 
 void helper_lsw(target_ulong addr, uint32_t nb, uint32_t reg)
 {
     int sh;
-    for (; nb > 3; nb -= 4, addr += 4) {
-        env->gpr[reg] = ldl(get_addr(addr));
+    for (; nb > 3; nb -= 4) {
+        env->gpr[reg] = ldl(addr);
         reg = (reg + 1) % 32;
+	addr = addr_add(addr, 4);
     }
     if (unlikely(nb > 0)) {
         env->gpr[reg] = 0;
-        for (sh = 24; nb > 0; nb--, addr++, sh -= 8) {
-            env->gpr[reg] |= ldub(get_addr(addr)) << sh;
+        for (sh = 24; nb > 0; nb--, sh -= 8) {
+            env->gpr[reg] |= ldub(addr) << sh;
+	    addr = addr_add(addr, 1);
         }
     }
 }
@@ -323,25 +327,26 @@
 void helper_stsw(target_ulong addr, uint32_t nb, uint32_t reg)
 {
     int sh;
-    for (; nb > 3; nb -= 4, addr += 4) {
-        stl(get_addr(addr), env->gpr[reg]);
+    for (; nb > 3; nb -= 4) {
+        stl(addr, env->gpr[reg]);
         reg = (reg + 1) % 32;
+	addr = addr_add(addr, 4);
     }
     if (unlikely(nb > 0)) {
-        for (sh = 24; nb > 0; nb--, addr++, sh -= 8)
-            stb(get_addr(addr), (env->gpr[reg] >> sh) & 0xFF);
+        for (sh = 24; nb > 0; nb--, sh -= 8)
+            stb(addr, (env->gpr[reg] >> sh) & 0xFF);
+	    addr = addr_add(addr, 1);
     }
 }
 
 static void do_dcbz(target_ulong addr, int dcache_line_size)
 {
-    target_long mask = get_addr(~(dcache_line_size - 1));
+    addr &= ~(dcache_line_size - 1);
     int i;
-    addr &= mask;
     for (i = 0 ; i < dcache_line_size ; i += 4) {
         stl(addr + i , 0);
     }
-    if ((env->reserve & mask) == addr)
+    if (env->reserve == addr)
         env->reserve = (target_ulong)-1ULL;
 }
 
@@ -362,7 +367,7 @@
 {
     uint32_t tmp;
 
-    addr = get_addr(addr & ~(env->dcache_line_size - 1));
+    addr &= ~(env->dcache_line_size - 1);
     /* Invalidate one cache line :
      * PowerPC specification says this is to be treated like a load
      * (not a fetch) by the MMU. To be sure it will be so,
@@ -378,7 +383,8 @@
     int i, c, d;
     d = 24;
     for (i = 0; i < xer_bc; i++) {
-        c = ldub((uint32_t)addr++);
+        c = ldub(addr);
+	addr = addr_add(addr, 1);
         /* ra (if not 0) and rb are never modified */
         if (likely(reg != rb && (ra == 0 || reg != ra))) {
             env->gpr[reg] = (env->gpr[reg] & ~(0xFF << d)) | (c << d);