Nicholas Piggin | 6b8a053 | 2023-06-20 20:57:37 +1000 | [diff] [blame] | 1 | #include "qemu/osdep.h" |
| 2 | #include "qemu/cutils.h" |
| 3 | #include "exec/exec-all.h" |
| 4 | #include "helper_regs.h" |
| 5 | #include "hw/ppc/ppc.h" |
| 6 | #include "hw/ppc/spapr.h" |
| 7 | #include "hw/ppc/spapr_cpu_core.h" |
| 8 | #include "hw/ppc/spapr_nested.h" |
| 9 | |
| 10 | #ifdef CONFIG_TCG |
| 11 | #define PRTS_MASK 0x1f |
| 12 | |
| 13 | static target_ulong h_set_ptbl(PowerPCCPU *cpu, |
| 14 | SpaprMachineState *spapr, |
| 15 | target_ulong opcode, |
| 16 | target_ulong *args) |
| 17 | { |
| 18 | target_ulong ptcr = args[0]; |
| 19 | |
| 20 | if (!spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV)) { |
| 21 | return H_FUNCTION; |
| 22 | } |
| 23 | |
| 24 | if ((ptcr & PRTS_MASK) + 12 - 4 > 12) { |
| 25 | return H_PARAMETER; |
| 26 | } |
| 27 | |
| 28 | spapr->nested_ptcr = ptcr; /* Save new partition table */ |
| 29 | |
| 30 | return H_SUCCESS; |
| 31 | } |
| 32 | |
| 33 | static target_ulong h_tlb_invalidate(PowerPCCPU *cpu, |
| 34 | SpaprMachineState *spapr, |
| 35 | target_ulong opcode, |
| 36 | target_ulong *args) |
| 37 | { |
| 38 | /* |
| 39 | * The spapr virtual hypervisor nested HV implementation retains no L2 |
| 40 | * translation state except for TLB. And the TLB is always invalidated |
| 41 | * across L1<->L2 transitions, so nothing is required here. |
| 42 | */ |
| 43 | |
| 44 | return H_SUCCESS; |
| 45 | } |
| 46 | |
| 47 | static target_ulong h_copy_tofrom_guest(PowerPCCPU *cpu, |
| 48 | SpaprMachineState *spapr, |
| 49 | target_ulong opcode, |
| 50 | target_ulong *args) |
| 51 | { |
| 52 | /* |
| 53 | * This HCALL is not required, L1 KVM will take a slow path and walk the |
| 54 | * page tables manually to do the data copy. |
| 55 | */ |
| 56 | return H_FUNCTION; |
| 57 | } |
| 58 | |
| 59 | static void nested_save_state(struct nested_ppc_state *save, PowerPCCPU *cpu) |
| 60 | { |
| 61 | CPUPPCState *env = &cpu->env; |
| 62 | |
| 63 | memcpy(save->gpr, env->gpr, sizeof(save->gpr)); |
| 64 | |
| 65 | save->lr = env->lr; |
| 66 | save->ctr = env->ctr; |
| 67 | save->cfar = env->cfar; |
| 68 | save->msr = env->msr; |
| 69 | save->nip = env->nip; |
| 70 | |
| 71 | save->cr = ppc_get_cr(env); |
| 72 | save->xer = cpu_read_xer(env); |
| 73 | |
| 74 | save->lpcr = env->spr[SPR_LPCR]; |
| 75 | save->lpidr = env->spr[SPR_LPIDR]; |
| 76 | save->pcr = env->spr[SPR_PCR]; |
| 77 | save->dpdes = env->spr[SPR_DPDES]; |
| 78 | save->hfscr = env->spr[SPR_HFSCR]; |
| 79 | save->srr0 = env->spr[SPR_SRR0]; |
| 80 | save->srr1 = env->spr[SPR_SRR1]; |
| 81 | save->sprg0 = env->spr[SPR_SPRG0]; |
| 82 | save->sprg1 = env->spr[SPR_SPRG1]; |
| 83 | save->sprg2 = env->spr[SPR_SPRG2]; |
| 84 | save->sprg3 = env->spr[SPR_SPRG3]; |
| 85 | save->pidr = env->spr[SPR_BOOKS_PID]; |
| 86 | save->ppr = env->spr[SPR_PPR]; |
| 87 | |
| 88 | save->tb_offset = env->tb_env->tb_offset; |
| 89 | } |
| 90 | |
| 91 | static void nested_load_state(PowerPCCPU *cpu, struct nested_ppc_state *load) |
| 92 | { |
| 93 | CPUState *cs = CPU(cpu); |
| 94 | CPUPPCState *env = &cpu->env; |
| 95 | |
| 96 | memcpy(env->gpr, load->gpr, sizeof(env->gpr)); |
| 97 | |
| 98 | env->lr = load->lr; |
| 99 | env->ctr = load->ctr; |
| 100 | env->cfar = load->cfar; |
| 101 | env->msr = load->msr; |
| 102 | env->nip = load->nip; |
| 103 | |
| 104 | ppc_set_cr(env, load->cr); |
| 105 | cpu_write_xer(env, load->xer); |
| 106 | |
| 107 | env->spr[SPR_LPCR] = load->lpcr; |
| 108 | env->spr[SPR_LPIDR] = load->lpidr; |
| 109 | env->spr[SPR_PCR] = load->pcr; |
| 110 | env->spr[SPR_DPDES] = load->dpdes; |
| 111 | env->spr[SPR_HFSCR] = load->hfscr; |
| 112 | env->spr[SPR_SRR0] = load->srr0; |
| 113 | env->spr[SPR_SRR1] = load->srr1; |
| 114 | env->spr[SPR_SPRG0] = load->sprg0; |
| 115 | env->spr[SPR_SPRG1] = load->sprg1; |
| 116 | env->spr[SPR_SPRG2] = load->sprg2; |
| 117 | env->spr[SPR_SPRG3] = load->sprg3; |
| 118 | env->spr[SPR_BOOKS_PID] = load->pidr; |
| 119 | env->spr[SPR_PPR] = load->ppr; |
| 120 | |
| 121 | env->tb_env->tb_offset = load->tb_offset; |
| 122 | |
| 123 | /* |
| 124 | * MSR updated, compute hflags and possible interrupts. |
| 125 | */ |
| 126 | hreg_compute_hflags(env); |
| 127 | ppc_maybe_interrupt(env); |
| 128 | |
| 129 | /* |
| 130 | * Nested HV does not tag TLB entries between L1 and L2, so must |
| 131 | * flush on transition. |
| 132 | */ |
| 133 | tlb_flush(cs); |
| 134 | env->reserve_addr = -1; /* Reset the reservation */ |
| 135 | } |
| 136 | |
| 137 | /* |
| 138 | * When this handler returns, the environment is switched to the L2 guest |
| 139 | * and TCG begins running that. spapr_exit_nested() performs the switch from |
| 140 | * L2 back to L1 and returns from the H_ENTER_NESTED hcall. |
| 141 | */ |
| 142 | static target_ulong h_enter_nested(PowerPCCPU *cpu, |
| 143 | SpaprMachineState *spapr, |
| 144 | target_ulong opcode, |
| 145 | target_ulong *args) |
| 146 | { |
| 147 | PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); |
| 148 | CPUPPCState *env = &cpu->env; |
| 149 | SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); |
| 150 | struct nested_ppc_state l2_state; |
| 151 | target_ulong hv_ptr = args[0]; |
| 152 | target_ulong regs_ptr = args[1]; |
| 153 | target_ulong hdec, now = cpu_ppc_load_tbl(env); |
| 154 | target_ulong lpcr, lpcr_mask; |
| 155 | struct kvmppc_hv_guest_state *hvstate; |
| 156 | struct kvmppc_hv_guest_state hv_state; |
| 157 | struct kvmppc_pt_regs *regs; |
| 158 | hwaddr len; |
| 159 | |
| 160 | if (spapr->nested_ptcr == 0) { |
| 161 | return H_NOT_AVAILABLE; |
| 162 | } |
| 163 | |
| 164 | len = sizeof(*hvstate); |
| 165 | hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, false, |
| 166 | MEMTXATTRS_UNSPECIFIED); |
| 167 | if (len != sizeof(*hvstate)) { |
| 168 | address_space_unmap(CPU(cpu)->as, hvstate, len, 0, false); |
| 169 | return H_PARAMETER; |
| 170 | } |
| 171 | |
| 172 | memcpy(&hv_state, hvstate, len); |
| 173 | |
| 174 | address_space_unmap(CPU(cpu)->as, hvstate, len, len, false); |
| 175 | |
| 176 | /* |
| 177 | * We accept versions 1 and 2. Version 2 fields are unused because TCG |
| 178 | * does not implement DAWR*. |
| 179 | */ |
| 180 | if (hv_state.version > HV_GUEST_STATE_VERSION) { |
| 181 | return H_PARAMETER; |
| 182 | } |
| 183 | |
| 184 | if (hv_state.lpid == 0) { |
| 185 | return H_PARAMETER; |
| 186 | } |
| 187 | |
| 188 | spapr_cpu->nested_host_state = g_try_new(struct nested_ppc_state, 1); |
| 189 | if (!spapr_cpu->nested_host_state) { |
| 190 | return H_NO_MEM; |
| 191 | } |
| 192 | |
| 193 | assert(env->spr[SPR_LPIDR] == 0); |
| 194 | assert(env->spr[SPR_DPDES] == 0); |
| 195 | nested_save_state(spapr_cpu->nested_host_state, cpu); |
| 196 | |
| 197 | len = sizeof(*regs); |
| 198 | regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, false, |
| 199 | MEMTXATTRS_UNSPECIFIED); |
| 200 | if (!regs || len != sizeof(*regs)) { |
| 201 | address_space_unmap(CPU(cpu)->as, regs, len, 0, false); |
| 202 | g_free(spapr_cpu->nested_host_state); |
| 203 | return H_P2; |
| 204 | } |
| 205 | |
| 206 | len = sizeof(l2_state.gpr); |
| 207 | assert(len == sizeof(regs->gpr)); |
| 208 | memcpy(l2_state.gpr, regs->gpr, len); |
| 209 | |
| 210 | l2_state.lr = regs->link; |
| 211 | l2_state.ctr = regs->ctr; |
| 212 | l2_state.xer = regs->xer; |
| 213 | l2_state.cr = regs->ccr; |
| 214 | l2_state.msr = regs->msr; |
| 215 | l2_state.nip = regs->nip; |
| 216 | |
| 217 | address_space_unmap(CPU(cpu)->as, regs, len, len, false); |
| 218 | |
| 219 | l2_state.cfar = hv_state.cfar; |
| 220 | l2_state.lpidr = hv_state.lpid; |
| 221 | |
| 222 | lpcr_mask = LPCR_DPFD | LPCR_ILE | LPCR_AIL | LPCR_LD | LPCR_MER; |
| 223 | lpcr = (env->spr[SPR_LPCR] & ~lpcr_mask) | (hv_state.lpcr & lpcr_mask); |
| 224 | lpcr |= LPCR_HR | LPCR_UPRT | LPCR_GTSE | LPCR_HVICE | LPCR_HDICE; |
| 225 | lpcr &= ~LPCR_LPES0; |
| 226 | l2_state.lpcr = lpcr & pcc->lpcr_mask; |
| 227 | |
| 228 | l2_state.pcr = hv_state.pcr; |
| 229 | /* hv_state.amor is not used */ |
| 230 | l2_state.dpdes = hv_state.dpdes; |
| 231 | l2_state.hfscr = hv_state.hfscr; |
| 232 | /* TCG does not implement DAWR*, CIABR, PURR, SPURR, IC, VTB, HEIR SPRs*/ |
| 233 | l2_state.srr0 = hv_state.srr0; |
| 234 | l2_state.srr1 = hv_state.srr1; |
| 235 | l2_state.sprg0 = hv_state.sprg[0]; |
| 236 | l2_state.sprg1 = hv_state.sprg[1]; |
| 237 | l2_state.sprg2 = hv_state.sprg[2]; |
| 238 | l2_state.sprg3 = hv_state.sprg[3]; |
| 239 | l2_state.pidr = hv_state.pidr; |
| 240 | l2_state.ppr = hv_state.ppr; |
| 241 | l2_state.tb_offset = env->tb_env->tb_offset + hv_state.tb_offset; |
| 242 | |
| 243 | /* |
| 244 | * Switch to the nested guest environment and start the "hdec" timer. |
| 245 | */ |
| 246 | nested_load_state(cpu, &l2_state); |
| 247 | |
| 248 | hdec = hv_state.hdec_expiry - now; |
| 249 | cpu_ppc_hdecr_init(env); |
| 250 | cpu_ppc_store_hdecr(env, hdec); |
| 251 | |
| 252 | /* |
| 253 | * The hv_state.vcpu_token is not needed. It is used by the KVM |
| 254 | * implementation to remember which L2 vCPU last ran on which physical |
| 255 | * CPU so as to invalidate process scope translations if it is moved |
| 256 | * between physical CPUs. For now TLBs are always flushed on L1<->L2 |
| 257 | * transitions so this is not a problem. |
| 258 | * |
| 259 | * Could validate that the same vcpu_token does not attempt to run on |
| 260 | * different L1 vCPUs at the same time, but that would be a L1 KVM bug |
| 261 | * and it's not obviously worth a new data structure to do it. |
| 262 | */ |
| 263 | |
| 264 | spapr_cpu->in_nested = true; |
| 265 | |
| 266 | /* |
| 267 | * The spapr hcall helper sets env->gpr[3] to the return value, but at |
| 268 | * this point the L1 is not returning from the hcall but rather we |
| 269 | * start running the L2, so r3 must not be clobbered, so return env->gpr[3] |
| 270 | * to leave it unchanged. |
| 271 | */ |
| 272 | return env->gpr[3]; |
| 273 | } |
| 274 | |
| 275 | void spapr_exit_nested(PowerPCCPU *cpu, int excp) |
| 276 | { |
| 277 | CPUPPCState *env = &cpu->env; |
| 278 | SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); |
| 279 | struct nested_ppc_state l2_state; |
| 280 | target_ulong hv_ptr = spapr_cpu->nested_host_state->gpr[4]; |
| 281 | target_ulong regs_ptr = spapr_cpu->nested_host_state->gpr[5]; |
| 282 | target_ulong hsrr0, hsrr1, hdar, asdr, hdsisr; |
| 283 | struct kvmppc_hv_guest_state *hvstate; |
| 284 | struct kvmppc_pt_regs *regs; |
| 285 | hwaddr len; |
| 286 | |
| 287 | assert(spapr_cpu->in_nested); |
| 288 | |
| 289 | nested_save_state(&l2_state, cpu); |
| 290 | hsrr0 = env->spr[SPR_HSRR0]; |
| 291 | hsrr1 = env->spr[SPR_HSRR1]; |
| 292 | hdar = env->spr[SPR_HDAR]; |
| 293 | hdsisr = env->spr[SPR_HDSISR]; |
| 294 | asdr = env->spr[SPR_ASDR]; |
| 295 | |
| 296 | /* |
| 297 | * Switch back to the host environment (including for any error). |
| 298 | */ |
| 299 | assert(env->spr[SPR_LPIDR] != 0); |
| 300 | nested_load_state(cpu, spapr_cpu->nested_host_state); |
| 301 | env->gpr[3] = env->excp_vectors[excp]; /* hcall return value */ |
| 302 | |
| 303 | cpu_ppc_hdecr_exit(env); |
| 304 | |
| 305 | spapr_cpu->in_nested = false; |
| 306 | |
| 307 | g_free(spapr_cpu->nested_host_state); |
| 308 | spapr_cpu->nested_host_state = NULL; |
| 309 | |
| 310 | len = sizeof(*hvstate); |
| 311 | hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, true, |
| 312 | MEMTXATTRS_UNSPECIFIED); |
| 313 | if (len != sizeof(*hvstate)) { |
| 314 | address_space_unmap(CPU(cpu)->as, hvstate, len, 0, true); |
| 315 | env->gpr[3] = H_PARAMETER; |
| 316 | return; |
| 317 | } |
| 318 | |
| 319 | hvstate->cfar = l2_state.cfar; |
| 320 | hvstate->lpcr = l2_state.lpcr; |
| 321 | hvstate->pcr = l2_state.pcr; |
| 322 | hvstate->dpdes = l2_state.dpdes; |
| 323 | hvstate->hfscr = l2_state.hfscr; |
| 324 | |
| 325 | if (excp == POWERPC_EXCP_HDSI) { |
| 326 | hvstate->hdar = hdar; |
| 327 | hvstate->hdsisr = hdsisr; |
| 328 | hvstate->asdr = asdr; |
| 329 | } else if (excp == POWERPC_EXCP_HISI) { |
| 330 | hvstate->asdr = asdr; |
| 331 | } |
| 332 | |
| 333 | /* HEIR should be implemented for HV mode and saved here. */ |
| 334 | hvstate->srr0 = l2_state.srr0; |
| 335 | hvstate->srr1 = l2_state.srr1; |
| 336 | hvstate->sprg[0] = l2_state.sprg0; |
| 337 | hvstate->sprg[1] = l2_state.sprg1; |
| 338 | hvstate->sprg[2] = l2_state.sprg2; |
| 339 | hvstate->sprg[3] = l2_state.sprg3; |
| 340 | hvstate->pidr = l2_state.pidr; |
| 341 | hvstate->ppr = l2_state.ppr; |
| 342 | |
| 343 | /* Is it okay to specify write length larger than actual data written? */ |
| 344 | address_space_unmap(CPU(cpu)->as, hvstate, len, len, true); |
| 345 | |
| 346 | len = sizeof(*regs); |
| 347 | regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, true, |
| 348 | MEMTXATTRS_UNSPECIFIED); |
| 349 | if (!regs || len != sizeof(*regs)) { |
| 350 | address_space_unmap(CPU(cpu)->as, regs, len, 0, true); |
| 351 | env->gpr[3] = H_P2; |
| 352 | return; |
| 353 | } |
| 354 | |
| 355 | len = sizeof(env->gpr); |
| 356 | assert(len == sizeof(regs->gpr)); |
| 357 | memcpy(regs->gpr, l2_state.gpr, len); |
| 358 | |
| 359 | regs->link = l2_state.lr; |
| 360 | regs->ctr = l2_state.ctr; |
| 361 | regs->xer = l2_state.xer; |
| 362 | regs->ccr = l2_state.cr; |
| 363 | |
| 364 | if (excp == POWERPC_EXCP_MCHECK || |
| 365 | excp == POWERPC_EXCP_RESET || |
| 366 | excp == POWERPC_EXCP_SYSCALL) { |
| 367 | regs->nip = l2_state.srr0; |
| 368 | regs->msr = l2_state.srr1 & env->msr_mask; |
| 369 | } else { |
| 370 | regs->nip = hsrr0; |
| 371 | regs->msr = hsrr1 & env->msr_mask; |
| 372 | } |
| 373 | |
| 374 | /* Is it okay to specify write length larger than actual data written? */ |
| 375 | address_space_unmap(CPU(cpu)->as, regs, len, len, true); |
| 376 | } |
| 377 | |
| 378 | void spapr_register_nested(void) |
| 379 | { |
| 380 | spapr_register_hypercall(KVMPPC_H_SET_PARTITION_TABLE, h_set_ptbl); |
| 381 | spapr_register_hypercall(KVMPPC_H_ENTER_NESTED, h_enter_nested); |
| 382 | spapr_register_hypercall(KVMPPC_H_TLB_INVALIDATE, h_tlb_invalidate); |
| 383 | spapr_register_hypercall(KVMPPC_H_COPY_TOFROM_GUEST, h_copy_tofrom_guest); |
| 384 | } |
| 385 | #else |
| 386 | void spapr_exit_nested(PowerPCCPU *cpu, int excp) |
| 387 | { |
| 388 | g_assert_not_reached(); |
| 389 | } |
| 390 | |
| 391 | void spapr_register_nested(void) |
| 392 | { |
| 393 | /* DO NOTHING */ |
| 394 | } |
| 395 | #endif |