accel/tcg: Move CPUNegativeOffsetState into CPUState

Retain the separate structure to emphasize its importance.
Enforce CPUArchState always follows CPUState without padding.

Reviewed-by: Anton Johansson <anjo@rev.ng>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index b2d4e22..098d99b 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -344,8 +344,8 @@
     tcg_ctx->page_bits = TARGET_PAGE_BITS;
     tcg_ctx->page_mask = TARGET_PAGE_MASK;
     tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS;
-    tcg_ctx->tlb_fast_offset =
-        (int)offsetof(ArchCPU, neg.tlb.f) - (int)offsetof(ArchCPU, env);
+    tcg_ctx->tlb_fast_offset = (int)offsetof(ArchCPU, parent_obj.neg.tlb.f)
+                             - (int)offsetof(ArchCPU, env);
 #endif
     tcg_ctx->insn_start_words = TARGET_INSN_START_WORDS;
 #ifdef TCG_GUEST_DEFAULT_MO
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 358214d..b3e12d6 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -48,8 +48,8 @@
     if ((cflags & CF_USE_ICOUNT) || !(cflags & CF_NOIRQ)) {
         count = tcg_temp_new_i32();
         tcg_gen_ld_i32(count, cpu_env,
-                       offsetof(ArchCPU, neg.icount_decr.u32) -
-                       offsetof(ArchCPU, env));
+                       offsetof(ArchCPU, parent_obj.neg.icount_decr.u32)
+                       - offsetof(ArchCPU, env));
     }
 
     if (cflags & CF_USE_ICOUNT) {
@@ -78,8 +78,8 @@
 
     if (cflags & CF_USE_ICOUNT) {
         tcg_gen_st16_i32(count, cpu_env,
-                         offsetof(ArchCPU, neg.icount_decr.u16.low) -
-                         offsetof(ArchCPU, env));
+                         offsetof(ArchCPU, parent_obj.neg.icount_decr.u16.low)
+                         - offsetof(ArchCPU, env));
     }
 
     /*
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index ed7747a..0dd32cb 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -432,9 +432,13 @@
 static inline void cpu_set_cpustate_pointers(ArchCPU *cpu)
 {
     cpu->parent_obj.env_ptr = &cpu->env;
-    cpu->parent_obj.icount_decr_ptr = &cpu->neg.icount_decr;
+    cpu->parent_obj.icount_decr_ptr = &cpu->parent_obj.neg.icount_decr;
 }
 
+/* Validate correct placement of CPUArchState. */
+QEMU_BUILD_BUG_ON(offsetof(ArchCPU, parent_obj) != 0);
+QEMU_BUILD_BUG_ON(offsetof(ArchCPU, env) != sizeof(CPUState));
+
 /**
  * env_archcpu(env)
  * @env: The architecture environment
@@ -443,7 +447,7 @@
  */
 static inline ArchCPU *env_archcpu(CPUArchState *env)
 {
-    return container_of(env, ArchCPU, env);
+    return (void *)env - sizeof(CPUState);
 }
 
 /**
@@ -454,15 +458,9 @@
  */
 static inline CPUState *env_cpu(CPUArchState *env)
 {
-    return &env_archcpu(env)->parent_obj;
+    return (void *)env - sizeof(CPUState);
 }
 
-/*
- * Validate placement of CPUNegativeOffsetState.
- */
-QEMU_BUILD_BUG_ON(offsetof(ArchCPU, env) - offsetof(ArchCPU, neg) >=
-                  sizeof(CPUNegativeOffsetState) + __alignof(CPUArchState));
-
 /**
  * env_neg(env)
  * @env: The architecture environment
@@ -471,8 +469,7 @@
  */
 static inline CPUNegativeOffsetState *env_neg(CPUArchState *env)
 {
-    ArchCPU *arch_cpu = container_of(env, ArchCPU, env);
-    return &arch_cpu->neg;
+    return &env_cpu(env)->neg;
 }
 
 /**
@@ -483,8 +480,7 @@
  */
 static inline CPUNegativeOffsetState *cpu_neg(CPUState *cpu)
 {
-    ArchCPU *arch_cpu = container_of(cpu, ArchCPU, parent_obj);
-    return &arch_cpu->neg;
+    return &cpu->neg;
 }
 
 /**
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 04baa50..115ddf6 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -345,8 +345,8 @@
 } IcountDecr;
 
 /*
- * This structure must be placed in ArchCPU immediately
- * before CPUArchState, as a field named "neg".
+ * Elements of CPUState most efficiently accessed from CPUArchState,
+ * via small negative offsets.
  */
 typedef struct CPUNegativeOffsetState {
     CPUTLB tlb;
@@ -453,6 +453,9 @@
  *    dirty ring structure.
  *
  * State of one CPU core or thread.
+ *
+ * Align, in order to match possible alignment required by CPUArchState,
+ * and eliminate a hole between CPUState and CPUArchState within ArchCPU.
  */
 struct CPUState {
     /*< private >*/
@@ -571,8 +574,18 @@
 
     /* track IOMMUs whose translations we've cached in the TCG TLB */
     GArray *iommu_notifiers;
+
+    /*
+     * MUST BE LAST in order to minimize the displacement to CPUArchState.
+     */
+    char neg_align[-sizeof(CPUNegativeOffsetState) % 16] QEMU_ALIGNED(16);
+    CPUNegativeOffsetState neg;
 };
 
+/* Validate placement of CPUNegativeOffsetState. */
+QEMU_BUILD_BUG_ON(offsetof(CPUState, neg) !=
+                  sizeof(CPUState) - sizeof(CPUNegativeOffsetState));
+
 typedef QTAILQ_HEAD(CPUTailQ, CPUState) CPUTailQ;
 extern CPUTailQ cpus;
 
diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h
index 1330666..e2a467e 100644
--- a/target/alpha/cpu.h
+++ b/target/alpha/cpu.h
@@ -263,7 +263,6 @@
     CPUState parent_obj;
     /*< public >*/
 
-    CPUNegativeOffsetState neg;
     CPUAlphaState env;
 
     /* This alarm doesn't exist in real hardware; we wish it did.  */
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index bd55c5d..a9edfb8 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -856,7 +856,6 @@
     CPUState parent_obj;
     /*< public >*/
 
-    CPUNegativeOffsetState neg;
     CPUARMState env;
 
     /* Coprocessor information */
diff --git a/target/avr/cpu.h b/target/avr/cpu.h
index 7225174..4ce22d8 100644
--- a/target/avr/cpu.h
+++ b/target/avr/cpu.h
@@ -148,7 +148,6 @@
     CPUState parent_obj;
     /*< public >*/
 
-    CPUNegativeOffsetState neg;
     CPUAVRState env;
 };
 
diff --git a/target/cris/cpu.h b/target/cris/cpu.h
index 8e37c6e..676b8e9 100644
--- a/target/cris/cpu.h
+++ b/target/cris/cpu.h
@@ -178,7 +178,6 @@
     CPUState parent_obj;
     /*< public >*/
 
-    CPUNegativeOffsetState neg;
     CPUCRISState env;
 };
 
diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
index daef5c3..10cd1ef 100644
--- a/target/hexagon/cpu.h
+++ b/target/hexagon/cpu.h
@@ -141,7 +141,7 @@
     /*< private >*/
     CPUState parent_obj;
     /*< public >*/
-    CPUNegativeOffsetState neg;
+
     CPUHexagonState env;
 
     bool lldb_compat;
diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
index 730f352..798d0c2 100644
--- a/target/hppa/cpu.h
+++ b/target/hppa/cpu.h
@@ -237,7 +237,6 @@
     CPUState parent_obj;
     /*< public >*/
 
-    CPUNegativeOffsetState neg;
     CPUHPPAState env;
     QEMUTimer *alarm_timer;
 };
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index d3f377d..e187546 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1901,7 +1901,6 @@
     CPUState parent_obj;
     /*< public >*/
 
-    CPUNegativeOffsetState neg;
     CPUX86State env;
     VMChangeStateEntry *vmsentry;
 
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index f125a8e..40e70a8 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -375,7 +375,6 @@
     CPUState parent_obj;
     /*< public >*/
 
-    CPUNegativeOffsetState neg;
     CPULoongArchState env;
     QEMUTimer timer;
     uint32_t  phy_id;
diff --git a/target/m68k/cpu.h b/target/m68k/cpu.h
index cf70282..20afb0c 100644
--- a/target/m68k/cpu.h
+++ b/target/m68k/cpu.h
@@ -168,7 +168,6 @@
     CPUState parent_obj;
     /*< public >*/
 
-    CPUNegativeOffsetState neg;
     CPUM68KState env;
 };
 
diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h
index f6cab6c..e43c49d 100644
--- a/target/microblaze/cpu.h
+++ b/target/microblaze/cpu.h
@@ -345,15 +345,15 @@
 struct ArchCPU {
     /*< private >*/
     CPUState parent_obj;
-
     /*< public >*/
+
+    CPUMBState env;
+
     bool ns_axi_dp;
     bool ns_axi_ip;
     bool ns_axi_dc;
     bool ns_axi_ic;
 
-    CPUNegativeOffsetState neg;
-    CPUMBState env;
     MicroBlazeCPUConfig cfg;
 };
 
diff --git a/target/mips/cpu.h b/target/mips/cpu.h
index 6d6af1f..67f8e8b 100644
--- a/target/mips/cpu.h
+++ b/target/mips/cpu.h
@@ -1213,10 +1213,10 @@
     CPUState parent_obj;
     /*< public >*/
 
+    CPUMIPSState env;
+
     Clock *clock;
     Clock *count_div; /* Divider for CP0_Count clock */
-    CPUNegativeOffsetState neg;
-    CPUMIPSState env;
 };
 
 
diff --git a/target/nios2/cpu.h b/target/nios2/cpu.h
index 477a316..70b6377 100644
--- a/target/nios2/cpu.h
+++ b/target/nios2/cpu.h
@@ -218,7 +218,6 @@
     CPUState parent_obj;
     /*< public >*/
 
-    CPUNegativeOffsetState neg;
     CPUNios2State env;
 
     bool diverr_present;
diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h
index ce4d605..334997e 100644
--- a/target/openrisc/cpu.h
+++ b/target/openrisc/cpu.h
@@ -305,7 +305,6 @@
     CPUState parent_obj;
     /*< public >*/
 
-    CPUNegativeOffsetState neg;
     CPUOpenRISCState env;
 };
 
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index d703a5f..30392eb 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1317,7 +1317,6 @@
     CPUState parent_obj;
     /*< public >*/
 
-    CPUNegativeOffsetState neg;
     CPUPPCState env;
 
     int vcpu_id;
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 6316cbc..ef9cf21 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -388,7 +388,7 @@
     /* < private > */
     CPUState parent_obj;
     /* < public > */
-    CPUNegativeOffsetState neg;
+
     CPURISCVState env;
 
     char *dyn_csr_xml;
diff --git a/target/rx/cpu.h b/target/rx/cpu.h
index 7f03ffc..f66754e 100644
--- a/target/rx/cpu.h
+++ b/target/rx/cpu.h
@@ -111,7 +111,6 @@
     CPUState parent_obj;
     /*< public >*/
 
-    CPUNegativeOffsetState neg;
     CPURXState env;
 };
 
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index 304029e..7bea707 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -170,7 +170,6 @@
     CPUState parent_obj;
     /*< public >*/
 
-    CPUNegativeOffsetState neg;
     CPUS390XState env;
     S390CPUModel *model;
     /* needed for live migration */
diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
index 1399d38..f75a235 100644
--- a/target/sh4/cpu.h
+++ b/target/sh4/cpu.h
@@ -208,7 +208,6 @@
     CPUState parent_obj;
     /*< public >*/
 
-    CPUNegativeOffsetState neg;
     CPUSH4State env;
 };
 
diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h
index 9804457..b3a98f1 100644
--- a/target/sparc/cpu.h
+++ b/target/sparc/cpu.h
@@ -561,7 +561,6 @@
     CPUState parent_obj;
     /*< public >*/
 
-    CPUNegativeOffsetState neg;
     CPUSPARCState env;
 };
 
diff --git a/target/tricore/cpu.h b/target/tricore/cpu.h
index 1cace96..a357b57 100644
--- a/target/tricore/cpu.h
+++ b/target/tricore/cpu.h
@@ -67,7 +67,6 @@
     CPUState parent_obj;
     /*< public >*/
 
-    CPUNegativeOffsetState neg;
     CPUTriCoreState env;
 };
 
diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h
index 87fe992..c6bbef1 100644
--- a/target/xtensa/cpu.h
+++ b/target/xtensa/cpu.h
@@ -560,9 +560,8 @@
     CPUState parent_obj;
     /*< public >*/
 
-    Clock *clock;
-    CPUNegativeOffsetState neg;
     CPUXtensaState env;
+    Clock *clock;
 };