Hexagon (target/hexagon) Short-circuit packet predicate writes

In certain cases, we can avoid the overhead of writing to hex_new_pred_value
and write directly to hex_pred.  We consider predicate reads/writes when
computing ctx->need_commit.  The get_result_pred() function uses this
field to decide between hex_new_pred_value and hex_pred.  Then, we can
early-exit from gen_pred_writes.

Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20230427230012.3800327-13-tsimpson@quicinc.com>
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index 9858d7b..5025e17 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -110,8 +110,18 @@
     gen_log_reg_write(ctx, rnum + 1, val32);
 }
 
+TCGv get_result_pred(DisasContext *ctx, int pnum)
+{
+    if (ctx->need_commit) {
+        return hex_new_pred_value[pnum];
+    } else {
+        return hex_pred[pnum];
+    }
+}
+
 void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val)
 {
+    TCGv pred = get_result_pred(ctx, pnum);
     TCGv base_val = tcg_temp_new();
 
     tcg_gen_andi_tl(base_val, val, 0xff);
@@ -124,10 +134,9 @@
      * straight assignment.  Otherwise, do an and.
      */
     if (!test_bit(pnum, ctx->pregs_written)) {
-        tcg_gen_mov_tl(hex_new_pred_value[pnum], base_val);
+        tcg_gen_mov_tl(pred, base_val);
     } else {
-        tcg_gen_and_tl(hex_new_pred_value[pnum],
-                       hex_new_pred_value[pnum], base_val);
+        tcg_gen_and_tl(pred, pred, base_val);
     }
     if (HEX_DEBUG) {
         tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << pnum);
diff --git a/target/hexagon/genptr.h b/target/hexagon/genptr.h
index 420867f..e11ccc2 100644
--- a/target/hexagon/genptr.h
+++ b/target/hexagon/genptr.h
@@ -35,6 +35,7 @@
 void gen_store8i(TCGv_env cpu_env, TCGv vaddr, int64_t src, uint32_t slot);
 TCGv gen_read_reg(TCGv result, int num);
 TCGv gen_read_preg(TCGv pred, uint8_t num);
+TCGv get_result_pred(DisasContext *ctx, int pnum);
 void gen_log_reg_write(DisasContext *ctx, int rnum, TCGv val);
 void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val);
 void gen_set_usr_field(DisasContext *ctx, int field, TCGv val);
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 6fa885c..bcf64f7 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -386,6 +386,14 @@
         }
     }
 
+    /* Check for overlap between predicate reads and writes */
+    for (int i = 0; i < ctx->preg_log_idx; i++) {
+        int pnum = ctx->preg_log[i];
+        if (test_bit(pnum, ctx->pregs_read)) {
+            return true;
+        }
+    }
+
     return false;
 }
 
@@ -503,7 +511,7 @@
      * Preload the predicated pred registers into hex_new_pred_value[pred_num]
      * Only endloop instructions conditionally write to pred registers
      */
-    if (pkt->pkt_has_endloop) {
+    if (ctx->need_commit && pkt->pkt_has_endloop) {
         for (int i = 0; i < ctx->preg_log_idx; i++) {
             int pred_num = ctx->preg_log[i];
             tcg_gen_mov_tl(hex_new_pred_value[pred_num], hex_pred[pred_num]);
@@ -622,8 +630,8 @@
 
 static void gen_pred_writes(DisasContext *ctx)
 {
-    /* Early exit if the log is empty */
-    if (!ctx->preg_log_idx) {
+    /* Early exit if not needed or the log is empty */
+    if (!ctx->need_commit || !ctx->preg_log_idx) {
         return;
     }