Hexagon (target/hexagon) Add overrides for dealloc-return instructions

These instructions perform a deallocframe+return (jumpr r31)

Add overrides for
    L4_return
    SL2_return
    L4_return_t
    L4_return_f
    L4_return_tnew_pt
    L4_return_fnew_pt
    L4_return_tnew_pnt
    L4_return_fnew_pnt
    SL2_return_t
    SL2_return_f
    SL2_return_tnew
    SL2_return_fnew

This patch eliminates the last helper that uses write_new_pc, so we
remove it from op_helper.c

Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Anton Johansson <anjo@rev.ng>
Message-Id: <20230307025828.1612809-5-tsimpson@quicinc.com>
diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h
index 5b7263a..e72672e 100644
--- a/target/hexagon/gen_tcg.h
+++ b/target/hexagon/gen_tcg.h
@@ -492,6 +492,59 @@
     fGEN_TCG_STORE_pcr(2, fSTORE(1, 4, EA, NtN))
 
 /*
+ * dealloc_return
+ * Assembler mapped to
+ * r31:30 = dealloc_return(r30):raw
+ */
+#define fGEN_TCG_L4_return(SHORTCODE) \
+    gen_return(ctx, RddV, RsV)
+
+/*
+ * sub-instruction version (no RddV, so handle it manually)
+ */
+#define fGEN_TCG_SL2_return(SHORTCODE) \
+    do { \
+        TCGv_i64 RddV = tcg_temp_new_i64(); \
+        gen_return(ctx, RddV, hex_gpr[HEX_REG_FP]); \
+        gen_log_reg_write_pair(HEX_REG_FP, RddV); \
+    } while (0)
+
+/*
+ * Conditional returns follow this naming convention
+ *     _t                 predicate true
+ *     _f                 predicate false
+ *     _tnew_pt           predicate.new true predict taken
+ *     _fnew_pt           predicate.new false predict taken
+ *     _tnew_pnt          predicate.new true predict not taken
+ *     _fnew_pnt          predicate.new false predict not taken
+ * Predictions are not modelled in QEMU
+ *
+ * Example:
+ *     if (p1) r31:30 = dealloc_return(r30):raw
+ */
+#define fGEN_TCG_L4_return_t(SHORTCODE) \
+    gen_cond_return(ctx, RddV, RsV, PvV, TCG_COND_EQ);
+#define fGEN_TCG_L4_return_f(SHORTCODE) \
+    gen_cond_return(ctx, RddV, RsV, PvV, TCG_COND_NE)
+#define fGEN_TCG_L4_return_tnew_pt(SHORTCODE) \
+    gen_cond_return(ctx, RddV, RsV, PvN, TCG_COND_EQ)
+#define fGEN_TCG_L4_return_fnew_pt(SHORTCODE) \
+    gen_cond_return(ctx, RddV, RsV, PvN, TCG_COND_NE)
+#define fGEN_TCG_L4_return_tnew_pnt(SHORTCODE) \
+    gen_cond_return(ctx, RddV, RsV, PvN, TCG_COND_EQ)
+#define fGEN_TCG_L4_return_fnew_pnt(SHORTCODE) \
+    gen_cond_return(ctx, RddV, RsV, PvN, TCG_COND_NE)
+
+#define fGEN_TCG_SL2_return_t(SHORTCODE) \
+    gen_cond_return_subinsn(ctx, TCG_COND_EQ, hex_pred[0])
+#define fGEN_TCG_SL2_return_f(SHORTCODE) \
+    gen_cond_return_subinsn(ctx, TCG_COND_NE, hex_pred[0])
+#define fGEN_TCG_SL2_return_tnew(SHORTCODE) \
+    gen_cond_return_subinsn(ctx, TCG_COND_EQ, hex_new_pred_value[0])
+#define fGEN_TCG_SL2_return_fnew(SHORTCODE) \
+    gen_cond_return_subinsn(ctx, TCG_COND_NE, hex_new_pred_value[0])
+
+/*
  * Mathematical operations with more than one definition require
  * special handling
  */
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index f7017fd..3490310 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -715,6 +715,83 @@
     gen_set_label(skip);
 }
 
+/* frame ^= (int64_t)FRAMEKEY << 32 */
+static void gen_frame_unscramble(TCGv_i64 frame)
+{
+    TCGv_i64 framekey = tcg_temp_new_i64();
+    tcg_gen_extu_i32_i64(framekey, hex_gpr[HEX_REG_FRAMEKEY]);
+    tcg_gen_shli_i64(framekey, framekey, 32);
+    tcg_gen_xor_i64(frame, frame, framekey);
+}
+
+static void gen_load_frame(DisasContext *ctx, TCGv_i64 frame, TCGv EA)
+{
+    Insn *insn = ctx->insn;  /* Needed for CHECK_NOSHUF */
+    CHECK_NOSHUF(EA, 8);
+    tcg_gen_qemu_ld64(frame, EA, ctx->mem_idx);
+}
+
+static void gen_return_base(DisasContext *ctx, TCGv_i64 dst, TCGv src,
+                            TCGv r29)
+{
+    /*
+     * frame = *src
+     * dst = frame_unscramble(frame)
+     * SP = src + 8
+     * PC = dst.w[1]
+     */
+    TCGv_i64 frame = tcg_temp_new_i64();
+    TCGv r31 = tcg_temp_new();
+
+    gen_load_frame(ctx, frame, src);
+    gen_frame_unscramble(frame);
+    tcg_gen_mov_i64(dst, frame);
+    tcg_gen_addi_tl(r29, src, 8);
+    tcg_gen_extrh_i64_i32(r31, dst);
+    gen_jumpr(ctx, r31);
+}
+
+static void gen_return(DisasContext *ctx, TCGv_i64 dst, TCGv src)
+{
+    TCGv r29 = tcg_temp_new();
+    gen_return_base(ctx, dst, src, r29);
+    gen_log_reg_write(HEX_REG_SP, r29);
+}
+
+/* if (pred) dst = dealloc_return(src):raw */
+static void gen_cond_return(DisasContext *ctx, TCGv_i64 dst, TCGv src,
+                            TCGv pred, TCGCond cond)
+{
+    TCGv LSB = tcg_temp_new();
+    TCGv mask = tcg_temp_new();
+    TCGv r29 = tcg_temp_new();
+    TCGLabel *skip = gen_new_label();
+    tcg_gen_andi_tl(LSB, pred, 1);
+
+    /* Initialize the results in case the predicate is false */
+    tcg_gen_movi_i64(dst, 0);
+    tcg_gen_movi_tl(r29, 0);
+
+    /* Set the bit in hex_slot_cancelled if the predicate is flase */
+    tcg_gen_movi_tl(mask, 1 << ctx->insn->slot);
+    tcg_gen_or_tl(mask, hex_slot_cancelled, mask);
+    tcg_gen_movcond_tl(cond, hex_slot_cancelled, LSB, tcg_constant_tl(0),
+                       mask, hex_slot_cancelled);
+
+    tcg_gen_brcondi_tl(cond, LSB, 0, skip);
+    gen_return_base(ctx, dst, src, r29);
+    gen_set_label(skip);
+    gen_log_predicated_reg_write(HEX_REG_SP, r29, ctx->insn->slot);
+}
+
+/* sub-instruction version (no RddV, so handle it manually) */
+static void gen_cond_return_subinsn(DisasContext *ctx, TCGCond cond, TCGv pred)
+{
+    TCGv_i64 RddV = tcg_temp_new_i64();
+    gen_cond_return(ctx, RddV, hex_gpr[HEX_REG_FP], pred, cond);
+    gen_log_predicated_reg_write_pair(HEX_REG_FP, RddV, ctx->insn->slot);
+}
+
 static void gen_endloop0(DisasContext *ctx)
 {
     TCGv lpcfg = tcg_temp_new();
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
index 35449ef..38b8aee 100644
--- a/target/hexagon/op_helper.c
+++ b/target/hexagon/op_helper.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -105,30 +105,6 @@
     env->mem_log_stores[slot].data64 = val;
 }
 
-void write_new_pc(CPUHexagonState *env, bool pkt_has_multi_cof,
-                         target_ulong addr)
-{
-    HEX_DEBUG_LOG("write_new_pc(0x" TARGET_FMT_lx ")\n", addr);
-
-    if (pkt_has_multi_cof) {
-        /*
-         * If more than one branch is taken in a packet, only the first one
-         * is actually done.
-         */
-        if (env->branch_taken) {
-            HEX_DEBUG_LOG("INFO: multiple branches taken in same packet, "
-                          "ignoring the second one\n");
-        } else {
-            fCHECK_PCALIGN(addr);
-            env->gpr[HEX_REG_PC] = addr;
-            env->branch_taken = 1;
-        }
-    } else {
-        fCHECK_PCALIGN(addr);
-        env->gpr[HEX_REG_PC] = addr;
-    }
-}
-
 /* Handy place to set a breakpoint */
 void HELPER(debug_start_packet)(CPUHexagonState *env)
 {