ppc64: Rudimentary Support for extra page sizes on server CPUs More recent Power server chips (i.e. based on the 64 bit hash MMU) support more than just the traditional 4k and 16M page sizes. This can get quite complicated, because which page sizes are supported, which combinations are supported within an MMU segment and how these page sizes are encoded both in the SLB entry and the hash PTE can vary depending on the CPU model (they are not specified by the architecture). In addition the firmware or hypervisor may not permit use of certain page sizes, for various reasons. Whether various page sizes are supported on KVM, for example, depends on whether the PR or HV variant of KVM is in use, and on the page size of the memory backing the guest's RAM. This patch adds information to the CPUState and cpu defs to describe the supported page sizes and encodings. Since TCG does not yet support any extended page sizes, we just set this to NULL in the static CPU definitions, expanding this to the default 4k and 16M page sizes when we initialize the cpu state. When using KVM, however, we instead determine available page sizes using the new KVM_PPC_GET_SMMU_INFO call. For old kernels without that call, we use some defaults, with some guesswork which should do the right thing for existing HV and PR implementations. The fallback might not be correct for future versions, but that's ok, because they'll have KVM_PPC_GET_SMMU_INFO. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Alexander Graf <agraf@suse.de>

commit: 4656e1f01289cc3aa20986deb6a407165826abe5 [log] [tgz]
author: Benjamin Herrenschmidt <benh@kernel.crashing.org> Mon Jun 18 19:56:25 2012 +0000
committer: Alexander Graf <agraf@suse.de> Sun Jun 24 01:04:44 2012 +0200
tree: a8eec4470ce0817b5c38f01b1e365d759419c3bb
parent: 77c2cf33fe8d272e5375b55c588202a18af0e27c [diff] [blame]
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index c09cc39..b6ef72d 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c

@@ -18,6 +18,7 @@
 #include <sys/types.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
+#include <sys/vfs.h>
 
 #include <linux/kvm.h>
 
@@ -167,10 +168,217 @@
     return 0;
 }
 
+
+#if defined(TARGET_PPC64)
+static void kvm_get_fallback_smmu_info(CPUPPCState *env,
+                                       struct kvm_ppc_smmu_info *info)
+{
+    memset(info, 0, sizeof(*info));
+
+    /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
+     * need to "guess" what the supported page sizes are.
+     *
+     * For that to work we make a few assumptions:
+     *
+     * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
+     *   KVM which only supports 4K and 16M pages, but supports them
+     *   regardless of the backing store characteritics. We also don't
+     *   support 1T segments.
+     *
+     *   This is safe as if HV KVM ever supports that capability or PR
+     *   KVM grows supports for more page/segment sizes, those versions
+     *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
+     *   will not hit this fallback
+     *
+     * - Else we are running HV KVM. This means we only support page
+     *   sizes that fit in the backing store. Additionally we only
+     *   advertize 64K pages if the processor is ARCH 2.06 and we assume
+     *   P7 encodings for the SLB and hash table. Here too, we assume
+     *   support for any newer processor will mean a kernel that
+     *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
+     *   this fallback.
+     */
+    if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
+        /* No flags */
+        info->flags = 0;
+        info->slb_size = 64;
+
+        /* Standard 4k base page size segment */
+        info->sps[0].page_shift = 12;
+        info->sps[0].slb_enc = 0;
+        info->sps[0].enc[0].page_shift = 12;
+        info->sps[0].enc[0].pte_enc = 0;
+
+        /* Standard 16M large page size segment */
+        info->sps[1].page_shift = 24;
+        info->sps[1].slb_enc = SLB_VSID_L;
+        info->sps[1].enc[0].page_shift = 24;
+        info->sps[1].enc[0].pte_enc = 0;
+    } else {
+        int i = 0;
+
+        /* HV KVM has backing store size restrictions */
+        info->flags = KVM_PPC_PAGE_SIZES_REAL;
+
+        if (env->mmu_model & POWERPC_MMU_1TSEG) {
+            info->flags |= KVM_PPC_1T_SEGMENTS;
+        }
+
+        if (env->mmu_model == POWERPC_MMU_2_06) {
+            info->slb_size = 32;
+        } else {
+            info->slb_size = 64;
+        }
+
+        /* Standard 4k base page size segment */
+        info->sps[i].page_shift = 12;
+        info->sps[i].slb_enc = 0;
+        info->sps[i].enc[0].page_shift = 12;
+        info->sps[i].enc[0].pte_enc = 0;
+        i++;
+
+        /* 64K on MMU 2.06 */
+        if (env->mmu_model == POWERPC_MMU_2_06) {
+            info->sps[i].page_shift = 16;
+            info->sps[i].slb_enc = 0x110;
+            info->sps[i].enc[0].page_shift = 16;
+            info->sps[i].enc[0].pte_enc = 1;
+            i++;
+        }
+
+        /* Standard 16M large page size segment */
+        info->sps[i].page_shift = 24;
+        info->sps[i].slb_enc = SLB_VSID_L;
+        info->sps[i].enc[0].page_shift = 24;
+        info->sps[i].enc[0].pte_enc = 0;
+    }
+}
+
+static void kvm_get_smmu_info(CPUPPCState *env, struct kvm_ppc_smmu_info *info)
+{
+    int ret;
+
+    if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
+        ret = kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
+        if (ret == 0) {
+            return;
+        }
+    }
+
+    kvm_get_fallback_smmu_info(env, info);
+}
+
+static long getrampagesize(void)
+{
+    struct statfs fs;
+    int ret;
+
+    if (!mem_path) {
+        /* guest RAM is backed by normal anonymous pages */
+        return getpagesize();
+    }
+
+    do {
+        ret = statfs(mem_path, &fs);
+    } while (ret != 0 && errno == EINTR);
+
+    if (ret != 0) {
+        fprintf(stderr, "Couldn't statfs() memory path: %s\n",
+                strerror(errno));
+        exit(1);
+    }
+
+#define HUGETLBFS_MAGIC       0x958458f6
+
+    if (fs.f_type != HUGETLBFS_MAGIC) {
+        /* Explicit mempath, but it's ordinary pages */
+        return getpagesize();
+    }
+
+    /* It's hugepage, return the huge page size */
+    return fs.f_bsize;
+}
+
+static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
+{
+    if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
+        return true;
+    }
+
+    return (1ul << shift) <= rampgsize;
+}
+
+static void kvm_fixup_page_sizes(CPUPPCState *env)
+{
+    static struct kvm_ppc_smmu_info smmu_info;
+    static bool has_smmu_info;
+    long rampagesize;
+    int iq, ik, jq, jk;
+
+    /* We only handle page sizes for 64-bit server guests for now */
+    if (!(env->mmu_model & POWERPC_MMU_64)) {
+        return;
+    }
+
+    /* Collect MMU info from kernel if not already */
+    if (!has_smmu_info) {
+        kvm_get_smmu_info(env, &smmu_info);
+        has_smmu_info = true;
+    }
+
+    rampagesize = getrampagesize();
+
+    /* Convert to QEMU form */
+    memset(&env->sps, 0, sizeof(env->sps));
+
+    for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
+        struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
+        struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
+
+        if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
+                                 ksps->page_shift)) {
+            continue;
+        }
+        qsps->page_shift = ksps->page_shift;
+        qsps->slb_enc = ksps->slb_enc;
+        for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
+            if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
+                                     ksps->enc[jk].page_shift)) {
+                continue;
+            }
+            qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
+            qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
+            if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
+                break;
+            }
+        }
+        if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
+            break;
+        }
+    }
+    env->slb_nr = smmu_info.slb_size;
+    if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
+        env->mmu_model |= POWERPC_MMU_1TSEG;
+    } else {
+        env->mmu_model &= ~POWERPC_MMU_1TSEG;
+    }
+}
+#else /* defined (TARGET_PPC64) */
+
+static inline void kvm_fixup_page_sizes(CPUPPCState *env)
+{
+}
+
+#endif /* !defined (TARGET_PPC64) */
+
 int kvm_arch_init_vcpu(CPUPPCState *cenv)
 {
     int ret;
 
+    /* Gather server mmu info from KVM and update the CPU state */
+    kvm_fixup_page_sizes(cenv);
+
+    /* Synchronize sregs with kvm */
     ret = kvm_arch_sync_sregs(cenv);
     if (ret) {
         return ret;
commit	4656e1f01289cc3aa20986deb6a407165826abe5	[log] [tgz]
author	Benjamin Herrenschmidt <benh@kernel.crashing.org>	Mon Jun 18 19:56:25 2012 +0000
committer	Alexander Graf <agraf@suse.de>	Sun Jun 24 01:04:44 2012 +0200
tree	a8eec4470ce0817b5c38f01b1e365d759419c3bb
parent	77c2cf33fe8d272e5375b55c588202a18af0e27c [diff] [blame]