target/i386/tcg: Use DPL-level accesses for interrupts and call gates

Stack accesses should be explicit and use the privilege level of the
target stack.  This ensures that SMAP is not applied when the target
stack is in ring 3.

This fixes a bug wherein i386/tcg assumed that an interrupt return, or a
far call using the CALL or JMP instruction, was always going from kernel
or user mode to kernel mode when using a call gate. This assumption is
violated if the call gate has a DPL that is greater than 0.

Analyzed-by: Robert R. Henry <rrh.henry@gmail.com>
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/249
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c
index 3b8fd82..02ae6a0 100644
--- a/target/i386/tcg/seg_helper.c
+++ b/target/i386/tcg/seg_helper.c
@@ -695,7 +695,6 @@
 
     sa.env = env;
     sa.ra = 0;
-    sa.mmu_index = cpu_mmu_index_kernel(env);
 
     if (type == 5) {
         /* task gate */
@@ -705,7 +704,9 @@
         }
         shift = switch_tss(env, intno * 8, e1, e2, SWITCH_TSS_CALL, old_eip);
         if (has_error_code) {
-            /* push the error code */
+            /* push the error code on the destination stack */
+            cpl = env->hflags & HF_CPL_MASK;
+            sa.mmu_index = x86_mmu_index_pl(env, cpl);
             if (env->segs[R_SS].flags & DESC_B_MASK) {
                 sa.sp_mask = 0xffffffff;
             } else {
@@ -750,6 +751,7 @@
     if (e2 & DESC_C_MASK) {
         dpl = cpl;
     }
+    sa.mmu_index = x86_mmu_index_pl(env, dpl);
     if (dpl < cpl) {
         /* to inner privilege */
         uint32_t esp;
@@ -1001,7 +1003,7 @@
 
     sa.env = env;
     sa.ra = 0;
-    sa.mmu_index = cpu_mmu_index_kernel(env);
+    sa.mmu_index = x86_mmu_index_pl(env, dpl);
     sa.sp_mask = -1;
     sa.ss_base = 0;
     if (dpl < cpl || ist != 0) {
@@ -1135,7 +1137,7 @@
     sa.sp = env->regs[R_ESP];
     sa.sp_mask = 0xffff;
     sa.ss_base = env->segs[R_SS].base;
-    sa.mmu_index = cpu_mmu_index_kernel(env);
+    sa.mmu_index = x86_mmu_index_pl(env, 0);
 
     if (is_int) {
         old_eip = next_eip;
@@ -1599,7 +1601,7 @@
     sa.sp = env->regs[R_ESP];
     sa.sp_mask = get_sp_mask(env->segs[R_SS].flags);
     sa.ss_base = env->segs[R_SS].base;
-    sa.mmu_index = cpu_mmu_index_kernel(env);
+    sa.mmu_index = x86_mmu_index_pl(env, 0);
 
     if (shift) {
         pushl(&sa, env->segs[R_CS].selector);
@@ -1639,9 +1641,9 @@
 
     sa.env = env;
     sa.ra = GETPC();
-    sa.mmu_index = cpu_mmu_index_kernel(env);
 
     if (e2 & DESC_S_MASK) {
+        /* "normal" far call, no stack switch possible */
         if (!(e2 & DESC_CS_MASK)) {
             raise_exception_err_ra(env, EXCP0D_GPF, new_cs & 0xfffc, GETPC());
         }
@@ -1665,6 +1667,7 @@
             raise_exception_err_ra(env, EXCP0B_NOSEG, new_cs & 0xfffc, GETPC());
         }
 
+        sa.mmu_index = x86_mmu_index_pl(env, cpl);
 #ifdef TARGET_X86_64
         /* XXX: check 16/32 bit cases in long mode */
         if (shift == 2) {
@@ -1792,6 +1795,7 @@
 
         if (!(e2 & DESC_C_MASK) && dpl < cpl) {
             /* to inner privilege */
+            sa.mmu_index = x86_mmu_index_pl(env, dpl);
 #ifdef TARGET_X86_64
             if (shift == 2) {
                 ss = dpl;  /* SS = NULL selector with RPL = new CPL */
@@ -1870,6 +1874,7 @@
             new_stack = 1;
         } else {
             /* to same privilege */
+            sa.mmu_index = x86_mmu_index_pl(env, cpl);
             sa.sp = env->regs[R_ESP];
             sa.sp_mask = get_sp_mask(env->segs[R_SS].flags);
             sa.ss_base = env->segs[R_SS].base;