target-s390: Convert DIVIDE

Signed-off-by: Richard Henderson <rth@twiddle.net>
diff --git a/target-s390x/helper.h b/target-s390x/helper.h
index a45b1c3..dcc3fce 100644
--- a/target-s390x/helper.h
+++ b/target-s390x/helper.h
@@ -10,7 +10,10 @@
 DEF_HELPER_4(clm, i32, env, i32, i32, i64)
 DEF_HELPER_4(stcm, void, env, i32, i32, i64)
 DEF_HELPER_FLAGS_3(mul128, TCG_CALL_NO_RWG, i64, env, i64, i64)
-DEF_HELPER_3(dlg, void, env, i32, i64)
+DEF_HELPER_3(divs32, s64, env, s64, s64)
+DEF_HELPER_3(divu32, i64, env, i64, i64)
+DEF_HELPER_3(divs64, s64, env, s64, s64)
+DEF_HELPER_4(divu64, i64, env, i64, i64, i64)
 DEF_HELPER_4(srst, i32, env, i32, i32, i32)
 DEF_HELPER_4(clst, i32, env, i32, i32, i32)
 DEF_HELPER_4(mvpg, void, env, i64, i64, i64)
diff --git a/target-s390x/insn-data.def b/target-s390x/insn-data.def
index 8ea6630..921c216 100644
--- a/target-s390x/insn-data.def
+++ b/target-s390x/insn-data.def
@@ -131,6 +131,20 @@
     C(0xc607, CLHRL,   RIL_b, GIE, r1_o, mri2_16u, 0, 0, 0, cmpu32)
     C(0xc606, CLGHRL,  RIL_b, GIE, r1_o, mri2_16u, 0, 0, 0, cmpu64)
 
+/* DIVIDE */
+    C(0x1d00, DR,      RR_a,  Z,   r1_D32, r2_32s, new_P, r1_P32, divs32, 0)
+    C(0x5d00, D,       RX_a,  Z,   r1_D32, m2_32s, new_P, r1_P32, divs32, 0)
+/* DIVIDE LOGICAL */
+    C(0xb997, DLR,     RRE,   Z,   r1_D32, r2_32u, new_P, r1_P32, divu32, 0)
+    C(0xe397, DL,      RXY_a, Z,   r1_D32, m2_32u, new_P, r1_P32, divu32, 0)
+    C(0xb987, DLGR,    RRE,   Z,   0, r2_o, r1_P, 0, divu64, 0)
+    C(0xe387, DLG,     RXY_a, Z,   0, m2_64, r1_P, 0, divu64, 0)
+/* DIVIDE SINGLE */
+    C(0xb90d, DSGR,    RRE,   Z,   r1p1, r2, r1_P, 0, divs64, 0)
+    C(0xb91d, DSGFR,   RRE,   Z,   r1p1, r2_32s, r1_P, 0, divs64, 0)
+    C(0xe30d, DSG,     RXY_a, Z,   r1p1, m2_64, r1_P, 0, divs64, 0)
+    C(0xe31d, DSGF,    RXY_a, Z,   r1p1, m2_32s, r1_P, 0, divs64, 0)
+
 /* EXCLUSIVE OR */
     C(0x1700, XR,      RR_a,  Z,   r1, r2, new, r1_32, xor, nz32)
     C(0xb9f7, XRK,     RRF_a, DO,  r2, r3, new, r1_32, xor, nz32)
diff --git a/target-s390x/int_helper.c b/target-s390x/int_helper.c
index 17c4771..80e17f5 100644
--- a/target-s390x/int_helper.c
+++ b/target-s390x/int_helper.c
@@ -37,32 +37,51 @@
     return reth;
 }
 
-/* 128 -> 64/64 unsigned division */
-void HELPER(dlg)(CPUS390XState *env, uint32_t r1, uint64_t v2)
+/* 64/32 -> 32 signed division */
+int64_t HELPER(divs32)(CPUS390XState *env, int64_t a, int64_t b)
 {
-    uint64_t divisor = v2;
+    env->retxl = a % (int32_t)b;
+    return a / (int32_t)b;
+}
 
-    if (!env->regs[r1]) {
+/* 64/32 -> 32 unsigned division */
+uint64_t HELPER(divu32)(CPUS390XState *env, uint64_t a, uint64_t b)
+{
+    env->retxl = a % (uint32_t)b;
+    return a / (uint32_t)b;
+}
+
+/* 64/64 -> 64 signed division */
+int64_t HELPER(divs64)(CPUS390XState *env, int64_t a, int64_t b)
+{
+    env->retxl = a % b;
+    return a / b;
+}
+
+/* 128 -> 64/64 unsigned division */
+uint64_t HELPER(divu64)(CPUS390XState *env, uint64_t ah, uint64_t al,
+                        uint64_t b)
+{
+    uint64_t ret;
+    if (ah == 0) {
         /* 64 -> 64/64 case */
-        env->regs[r1] = env->regs[r1 + 1] % divisor;
-        env->regs[r1 + 1] = env->regs[r1 + 1] / divisor;
-        return;
+        env->retxl = al % b;
+        ret = al / b;
     } else {
+        /* ??? Move i386 idivq helper to host-utils.  */
 #if HOST_LONG_BITS == 64 && defined(__GNUC__)
         /* assuming 64-bit hosts have __uint128_t */
-        __uint128_t dividend = (((__uint128_t)env->regs[r1]) << 64) |
-            (env->regs[r1 + 1]);
-        __uint128_t quotient = dividend / divisor;
-        __uint128_t remainder = dividend % divisor;
-
-        env->regs[r1 + 1] = quotient;
-        env->regs[r1] = remainder;
+        __uint128_t a = ((__uint128_t)ah << 64) | al;
+        __uint128_t q = a / b;
+        env->retxl = a % b;
+        ret = q;
 #else
         /* 32-bit hosts would need special wrapper functionality - just abort if
            we encounter such a case; it's very unlikely anyways. */
         cpu_abort(env, "128 -> 64/64 division not implemented\n");
 #endif
     }
+    return ret;
 }
 
 /* absolute value 32-bit */
diff --git a/target-s390x/translate.c b/target-s390x/translate.c
index fdf0129..5667155 100644
--- a/target-s390x/translate.c
+++ b/target-s390x/translate.c
@@ -1232,31 +1232,13 @@
 static void disas_e3(CPUS390XState *env, DisasContext* s, int op, int r1,
                      int x2, int b2, int d2)
 {
-    TCGv_i64 addr, tmp, tmp2, tmp3, tmp4;
+    TCGv_i64 addr, tmp2, tmp3;
     TCGv_i32 tmp32_1;
 
     LOG_DISAS("disas_e3: op 0x%x r1 %d x2 %d b2 %d d2 %d\n",
               op, r1, x2, b2, d2);
     addr = get_address(s, x2, b2, d2);
     switch (op) {
-    case 0xd: /* DSG      R1,D2(X2,B2)     [RXY] */
-    case 0x1d: /* DSGF      R1,D2(X2,B2)     [RXY] */
-        tmp2 = tcg_temp_new_i64();
-        if (op == 0x1d) {
-            tcg_gen_qemu_ld32s(tmp2, addr, get_mem_index(s));
-        } else {
-            tcg_gen_qemu_ld64(tmp2, addr, get_mem_index(s));
-        }
-        tmp4 = load_reg(r1 + 1);
-        tmp3 = tcg_temp_new_i64();
-        tcg_gen_div_i64(tmp3, tmp4, tmp2);
-        store_reg(r1 + 1, tmp3);
-        tcg_gen_rem_i64(tmp3, tmp4, tmp2);
-        store_reg(r1, tmp3);
-        tcg_temp_free_i64(tmp2);
-        tcg_temp_free_i64(tmp3);
-        tcg_temp_free_i64(tmp4);
-        break;
     case 0xf: /* LRVG     R1,D2(X2,B2)     [RXE] */
         tmp2 = tcg_temp_new_i64();
         tcg_gen_qemu_ld64(tmp2, addr, get_mem_index(s));
@@ -1306,34 +1288,6 @@
         store_reg8(r1, tmp3);
         tcg_temp_free_i64(tmp3);
         break;
-    case 0x87: /* DLG      R1,D2(X2,B2)     [RXY] */
-        tmp2 = tcg_temp_new_i64();
-        tmp32_1 = tcg_const_i32(r1);
-        tcg_gen_qemu_ld64(tmp2, addr, get_mem_index(s));
-        gen_helper_dlg(cpu_env, tmp32_1, tmp2);
-        tcg_temp_free_i64(tmp2);
-        tcg_temp_free_i32(tmp32_1);
-        break;
-    case 0x97: /* DL     R1,D2(X2,B2)     [RXY] */
-        /* reg(r1) = reg(r1, r1+1) % ld32(addr) */
-        /* reg(r1+1) = reg(r1, r1+1) / ld32(addr) */
-        tmp = load_reg(r1);
-        tmp2 = tcg_temp_new_i64();
-        tcg_gen_qemu_ld32u(tmp2, addr, get_mem_index(s));
-        tmp3 = load_reg((r1 + 1) & 15);
-        tcg_gen_ext32u_i64(tmp2, tmp2);
-        tcg_gen_ext32u_i64(tmp3, tmp3);
-        tcg_gen_shli_i64(tmp, tmp, 32);
-        tcg_gen_or_i64(tmp, tmp, tmp3);
-
-        tcg_gen_rem_i64(tmp3, tmp, tmp2);
-        tcg_gen_div_i64(tmp, tmp, tmp2);
-        store_reg32_i64((r1 + 1) & 15, tmp);
-        store_reg32_i64(r1, tmp3);
-        tcg_temp_free_i64(tmp);
-        tcg_temp_free_i64(tmp2);
-        tcg_temp_free_i64(tmp3);
-        break;
     default:
         LOG_DISAS("illegal e3 operation 0x%x\n", op);
         gen_illegal_opcode(s);
@@ -2414,31 +2368,11 @@
 static void disas_b9(CPUS390XState *env, DisasContext *s, int op, int r1,
                      int r2)
 {
-    TCGv_i64 tmp, tmp2, tmp3;
+    TCGv_i64 tmp;
     TCGv_i32 tmp32_1;
 
     LOG_DISAS("disas_b9: op 0x%x r1 %d r2 %d\n", op, r1, r2);
     switch (op) {
-    case 0xd: /* DSGR      R1,R2     [RRE] */
-    case 0x1d: /* DSGFR      R1,R2     [RRE] */
-        tmp = load_reg(r1 + 1);
-        if (op == 0xd) {
-            tmp2 = load_reg(r2);
-        } else {
-            tmp32_1 = load_reg32(r2);
-            tmp2 = tcg_temp_new_i64();
-            tcg_gen_ext_i32_i64(tmp2, tmp32_1);
-            tcg_temp_free_i32(tmp32_1);
-        }
-        tmp3 = tcg_temp_new_i64();
-        tcg_gen_div_i64(tmp3, tmp, tmp2);
-        store_reg(r1 + 1, tmp3);
-        tcg_gen_rem_i64(tmp3, tmp, tmp2);
-        store_reg(r1, tmp3);
-        tcg_temp_free_i64(tmp);
-        tcg_temp_free_i64(tmp2);
-        tcg_temp_free_i64(tmp3);
-        break;
     case 0x17: /* LLGTR      R1,R2     [RRE] */
         tmp32_1 = load_reg32(r2);
         tmp = tcg_temp_new_i64();
@@ -2465,32 +2399,6 @@
         tcg_temp_free_i64(tmp);
         tcg_temp_free_i32(tmp32_1);
         break;
-    case 0x87: /* DLGR      R1,R2     [RRE] */
-        tmp32_1 = tcg_const_i32(r1);
-        tmp = load_reg(r2);
-        gen_helper_dlg(cpu_env, tmp32_1, tmp);
-        tcg_temp_free_i64(tmp);
-        tcg_temp_free_i32(tmp32_1);
-        break;
-    case 0x97: /* DLR     R1,R2     [RRE] */
-        /* reg(r1) = reg(r1, r1+1) % reg(r2) */
-        /* reg(r1+1) = reg(r1, r1+1) / reg(r2) */
-        tmp = load_reg(r1);
-        tmp2 = load_reg(r2);
-        tmp3 = load_reg((r1 + 1) & 15);
-        tcg_gen_ext32u_i64(tmp2, tmp2);
-        tcg_gen_ext32u_i64(tmp3, tmp3);
-        tcg_gen_shli_i64(tmp, tmp, 32);
-        tcg_gen_or_i64(tmp, tmp, tmp3);
-
-        tcg_gen_rem_i64(tmp3, tmp, tmp2);
-        tcg_gen_div_i64(tmp, tmp, tmp2);
-        store_reg32_i64((r1 + 1) & 15, tmp);
-        store_reg32_i64(r1, tmp3);
-        tcg_temp_free_i64(tmp);
-        tcg_temp_free_i64(tmp2);
-        tcg_temp_free_i64(tmp3);
-        break;
     default:
         LOG_DISAS("illegal b9 operation 0x%x\n", op);
         gen_illegal_opcode(s);
@@ -2543,41 +2451,6 @@
         tcg_temp_free_i32(tmp32_1);
         tcg_temp_free_i32(tmp32_2);
         break;
-    case 0x1d: /* DR     R1,R2               [RR] */
-        insn = ld_code2(env, s->pc);
-        decode_rr(s, insn, &r1, &r2);
-        tmp32_1 = load_reg32(r1);
-        tmp32_2 = load_reg32(r1 + 1);
-        tmp32_3 = load_reg32(r2);
-
-        tmp = tcg_temp_new_i64(); /* dividend */
-        tmp2 = tcg_temp_new_i64(); /* divisor */
-        tmp3 = tcg_temp_new_i64();
-
-        /* dividend is r(r1 << 32) | r(r1 + 1) */
-        tcg_gen_extu_i32_i64(tmp, tmp32_1);
-        tcg_gen_extu_i32_i64(tmp2, tmp32_2);
-        tcg_gen_shli_i64(tmp, tmp, 32);
-        tcg_gen_or_i64(tmp, tmp, tmp2);
-
-        /* divisor is r(r2) */
-        tcg_gen_ext_i32_i64(tmp2, tmp32_3);
-
-        tcg_gen_div_i64(tmp3, tmp, tmp2);
-        tcg_gen_rem_i64(tmp, tmp, tmp2);
-
-        tcg_gen_trunc_i64_i32(tmp32_1, tmp);
-        tcg_gen_trunc_i64_i32(tmp32_2, tmp3);
-
-        store_reg32(r1, tmp32_1); /* remainder */
-        store_reg32(r1 + 1, tmp32_2); /* quotient */
-        tcg_temp_free_i32(tmp32_1);
-        tcg_temp_free_i32(tmp32_2);
-        tcg_temp_free_i32(tmp32_3);
-        tcg_temp_free_i64(tmp);
-        tcg_temp_free_i64(tmp2);
-        tcg_temp_free_i64(tmp3);
-        break;
     case 0x28: /* LDR    R1,R2               [RR] */
         insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
@@ -2626,40 +2499,6 @@
         tcg_temp_free_i64(tmp2);
         tcg_temp_free_i32(tmp32_1);
         break;
-    case 0x5d: /* D      R1,D2(X2,B2)        [RX] */
-        insn = ld_code4(env, s->pc);
-        tmp3 = decode_rx(s, insn, &r1, &x2, &b2, &d2);
-        tmp32_1 = load_reg32(r1);
-        tmp32_2 = load_reg32(r1 + 1);
-
-        tmp = tcg_temp_new_i64();
-        tmp2 = tcg_temp_new_i64();
-
-        /* dividend is r(r1 << 32) | r(r1 + 1) */
-        tcg_gen_extu_i32_i64(tmp, tmp32_1);
-        tcg_gen_extu_i32_i64(tmp2, tmp32_2);
-        tcg_gen_shli_i64(tmp, tmp, 32);
-        tcg_gen_or_i64(tmp, tmp, tmp2);
-
-        /* divisor is in memory */
-        tcg_gen_qemu_ld32s(tmp2, tmp3, get_mem_index(s));
-
-        /* XXX divisor == 0 -> FixP divide exception */
-
-        tcg_gen_div_i64(tmp3, tmp, tmp2);
-        tcg_gen_rem_i64(tmp, tmp, tmp2);
-
-        tcg_gen_trunc_i64_i32(tmp32_1, tmp);
-        tcg_gen_trunc_i64_i32(tmp32_2, tmp3);
-
-        store_reg32(r1, tmp32_1); /* remainder */
-        store_reg32(r1 + 1, tmp32_2); /* quotient */
-        tcg_temp_free_i32(tmp32_1);
-        tcg_temp_free_i32(tmp32_2);
-        tcg_temp_free_i64(tmp);
-        tcg_temp_free_i64(tmp2);
-        tcg_temp_free_i64(tmp3);
-        break;
     case 0x60: /* STD    R1,D2(X2,B2)        [RX] */
         insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
@@ -3766,6 +3605,34 @@
     return help_branch(s, &c, is_imm, imm, o->in2);
 }
 
+static ExitStatus op_divs32(DisasContext *s, DisasOps *o)
+{
+    gen_helper_divs32(o->out2, cpu_env, o->in1, o->in2);
+    return_low128(o->out);
+    return NO_EXIT;
+}
+
+static ExitStatus op_divu32(DisasContext *s, DisasOps *o)
+{
+    gen_helper_divu32(o->out2, cpu_env, o->in1, o->in2);
+    return_low128(o->out);
+    return NO_EXIT;
+}
+
+static ExitStatus op_divs64(DisasContext *s, DisasOps *o)
+{
+    gen_helper_divs64(o->out2, cpu_env, o->in1, o->in2);
+    return_low128(o->out);
+    return NO_EXIT;
+}
+
+static ExitStatus op_divu64(DisasContext *s, DisasOps *o)
+{
+    gen_helper_divu64(o->out2, cpu_env, o->out, o->out2, o->in2);
+    return_low128(o->out);
+    return NO_EXIT;
+}
+
 static ExitStatus op_insi(DisasContext *s, DisasOps *o)
 {
     int shift = s->insn->data & 0xff;
@@ -4089,6 +3956,12 @@
     o->out = tcg_temp_new_i64();
 }
 
+static void prep_new_P(DisasContext *s, DisasFields *f, DisasOps *o)
+{
+    o->out = tcg_temp_new_i64();
+    o->out2 = tcg_temp_new_i64();
+}
+
 static void prep_r1(DisasContext *s, DisasFields *f, DisasOps *o)
 {
     o->out = regs[get_field(f, r1)];
@@ -4120,6 +3993,14 @@
     store_reg32_i64(get_field(f, r1), o->out);
 }
 
+static void wout_r1_P32(DisasContext *s, DisasFields *f, DisasOps *o)
+{
+    /* ??? Specification exception: r1 must be even.  */
+    int r1 = get_field(f, r1);
+    store_reg32_i64(r1, o->out);
+    store_reg32_i64((r1 + 1) & 15, o->out2);
+}
+
 static void wout_r1_D32(DisasContext *s, DisasFields *f, DisasOps *o)
 {
     /* ??? Specification exception: r1 must be even.  */
@@ -4183,6 +4064,14 @@
     tcg_gen_ext32u_i64(o->in1, regs[(r1 + 1) & 15]);
 }
 
+static void in1_r1_D32(DisasContext *s, DisasFields *f, DisasOps *o)
+{
+    /* ??? Specification exception: r1 must be even.  */
+    int r1 = get_field(f, r1);
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_concat32_i64(o->in1, regs[r1 + 1], regs[r1]);
+}
+
 static void in1_r2(DisasContext *s, DisasFields *f, DisasOps *o)
 {
     o->in1 = load_reg(get_field(f, r2));