diff --git a/armpmu_lib.h b/armpmu_lib.h index 1abca70..214366a 100644 --- a/armpmu_lib.h +++ b/armpmu_lib.h @@ -4,13 +4,71 @@ static inline uint32_t rdtsc32(void) { -#if defined(__GNUC__) && defined(__ARM_ARCH_7A__) +#if defined(__GNUC__) uint32_t r = 0; +#if defined __aarch64__ + asm volatile("mrs %0, pmccntr_el0" : "=r" (r)); +#elif defined(__ARM_ARCH_7A__) asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(r) ); +#else +#error Unsupported architecture/compiler! +#endif return r; +#endif +} + +#define ARMV8_PMEVTYPER_P (1 << 31) /* EL1 modes filtering bit */ +#define ARMV8_PMEVTYPER_U (1 << 30) /* EL0 filtering bit */ +#define ARMV8_PMEVTYPER_NSK (1 << 29) /* Non-secure EL1 (kernel) modes filtering bit */ +#define ARMV8_PMEVTYPER_NSU (1 << 28) /* Non-secure User mode filtering bit */ +#define ARMV8_PMEVTYPER_NSH (1 << 27) /* Non-secure Hyp modes filtering bit */ +#define ARMV8_PMEVTYPER_M (1 << 26) /* Secure EL3 filtering bit */ +#define ARMV8_PMEVTYPER_MT (1 << 25) /* Multithreading */ +#define ARMV8_PMEVTYPER_EVTCOUNT_MASK 0x3ff + +static inline void +enable_pmu(uint32_t evtCount) +{ +#if defined(__GNUC__) && defined __aarch64__ + evtCount &= ARMV8_PMEVTYPER_EVTCOUNT_MASK; + asm volatile("isb"); + /* Just use counter 0 */ + asm volatile("msr pmevtyper0_el0, %0" : : "r" (evtCount)); + /* Performance Monitors Count Enable Set register bit 30:1 disable, 31,1 enable */ + uint32_t r = 0; + + asm volatile("mrs %0, pmcntenset_el0" : "=r" (r)); + asm volatile("msr pmcntenset_el0, %0" : : "r" (r|1)); #else #error Unsupported architecture/compiler! #endif } +static inline uint32_t +read_pmu(void) +{ +#if defined(__GNUC__) && defined __aarch64__ + uint32_t r = 0; + asm volatile("mrs %0, pmevcntr0_el0" : "=r" (r)); + return r; +#else +#error Unsupported architecture/compiler! +#endif +} + +static inline void +disable_pmu(uint32_t evtCount) +{ +#if defined(__GNUC__) && defined __aarch64__ + /* Performance Monitors Count Enable Set register: clear bit 0 */ + uint32_t r = 0; + + asm volatile("mrs %0, pmcntenset_el0" : "=r" (r)); + asm volatile("msr pmcntenset_el0, %0" : : "r" (r&&0xfffffffe)); +#else +#error Unsupported architecture/compiler! +#endif +} + + #endif /* ARMPMU_LIB_H */ diff --git a/ko/enable_arm_pmu.c b/ko/enable_arm_pmu.c index 34a1d20..28cfd35 100644 --- a/ko/enable_arm_pmu.c +++ b/ko/enable_arm_pmu.c @@ -9,27 +9,72 @@ #define DRVR_NAME "enable_arm_pmu" -#if !defined(__arm__) +#if !defined(__arm__) && !defined(__aarch64__) #error Module can only be compiled on ARM machines. #endif /** -- Initialization & boilerplate ---------------------------------------- */ +#define ARMV8_PMCR_MASK 0x3f +#define ARMV8_PMCR_E (1 << 0) /* Enable all counters */ +#define ARMV8_PMCR_P (1 << 1) /* Reset all counters */ +#define ARMV8_PMCR_C (1 << 2) /* Cycle counter reset */ +#define ARMV8_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */ +#define ARMV8_PMCR_X (1 << 4) /* Export to ETM */ +#define ARMV8_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ +#define ARMV8_PMCR_N_SHIFT 11 /* Number of counters supported */ +#define ARMV8_PMCR_N_MASK 0x1f + +#define ARMV8_PMUSERENR_EN_EL0 (1 << 0) /* EL0 access enable */ +#define ARMV8_PMUSERENR_CR (1 << 2) /* Cycle counter read enable */ +#define ARMV8_PMUSERENR_ER (1 << 3) /* Event counter read enable */ + +#define ARMV8_PMCNTENSET_EL0_ENABLE (1<<31) /* *< Enable Perf count reg */ #define PERF_DEF_OPTS (1 | 16) #define PERF_OPT_RESET_CYCLES (2 | 4) #define PERF_OPT_DIV64 (8) +static inline u32 armv8pmu_pmcr_read(void) +{ + u64 val=0; + asm volatile("mrs %0, pmcr_el0" : "=r" (val)); + return (u32)val; +} +static inline void armv8pmu_pmcr_write(u32 val) +{ + val &= ARMV8_PMCR_MASK; + isb(); + asm volatile("msr pmcr_el0, %0" : : "r" ((u64)val)); +} + static void enable_cpu_counters(void* data) { printk(KERN_INFO "[" DRVR_NAME "] enabling user-mode PMU access on CPU #%d", smp_processor_id()); +#if __aarch64__ + /* Enable user-mode access to counters. */ + asm volatile("msr pmuserenr_el0, %0" : : "r"((u64)ARMV8_PMUSERENR_EN_EL0|ARMV8_PMUSERENR_ER|ARMV8_PMUSERENR_CR)); + /* Initialize & Reset PMNC: C and P bits. */ + armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C); + /* G4.4.11 + * PMINTENSET, Performance Monitors Interrupt Enable Set register */ + /* cycle counter overflow interrupt request is disabled */ + asm volatile("msr pmintenset_el1, %0" : : "r" ((u64)(0 << 31))); + /* Performance Monitors Count Enable Set register bit 30:0 disable, 31 enable */ + asm volatile("msr pmcntenset_el0, %0" : : "r" (ARMV8_PMCNTENSET_EL0_ENABLE)); + /* start*/ + armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMCR_E); +#elif defined(__ARM_ARCH_7A__) /* Enable user-mode access to counters. */ asm volatile("mcr p15, 0, %0, c9, c14, 0" :: "r"(1)); /* Program PMU and enable all counters */ asm volatile("mcr p15, 0, %0, c9, c12, 0" :: "r"(PERF_DEF_OPTS)); asm volatile("mcr p15, 0, %0, c9, c12, 1" :: "r"(0x8000000f)); +#else +#error Unsupported Architecture +#endif } static void @@ -38,11 +83,23 @@ disable_cpu_counters(void* data) printk(KERN_INFO "[" DRVR_NAME "] disabling user-mode PMU access on CPU #%d", smp_processor_id()); +#if __aarch64__ + /* Performance Monitors Count Enable Set register bit 31:0 disable, 1 enable */ + asm volatile("msr pmcntenset_el0, %0" : : "r" (0<<31)); + /* Note above statement does not really clearing register...refer to doc */ + /* Program PMU and disable all counters */ + armv8pmu_pmcr_write(armv8pmu_pmcr_read() |~ARMV8_PMCR_E); + /* disable user-mode access to counters. */ + asm volatile("msr pmuserenr_el0, %0" : : "r"((u64)0)); +#elif defined(__ARM_ARCH_7A__) /* Program PMU and disable all counters */ asm volatile("mcr p15, 0, %0, c9, c12, 0" :: "r"(0)); asm volatile("mcr p15, 0, %0, c9, c12, 2" :: "r"(0x8000000f)); /* Disable user-mode access to counters. */ asm volatile("mcr p15, 0, %0, c9, c14, 0" :: "r"(0)); +#else +#error Unsupported Architecture +#endif } static int __init diff --git a/perf_arm_pmu.c b/perf_arm_pmu.c index 0e59d54..1689e8a 100644 --- a/perf_arm_pmu.c +++ b/perf_arm_pmu.c @@ -21,6 +21,8 @@ main(int ac, char **av) { uint32_t time_start = 0; uint32_t time_end = 0; + uint32_t cnt_start = 0; + uint32_t cnt_end = 0; int *a = NULL; int *b = NULL; @@ -45,6 +47,14 @@ main(int ac, char **av) time_end = rdtsc32(); printf("%s: done. sum = %d; time delta = %u\n", av[0], sum, time_end - time_start); + printf("%s: beginning loop\n", av[0]); + enable_pmu(0x008); + cnt_start = read_pmu(); + sum = loop(a, b, len); + cnt_end = read_pmu(); + disable_pmu(0x008); + printf("%s: done. sum = %d; event 0x%03x delta = %u\n", av[0], sum, 0x008, cnt_end - cnt_start); + free(a); free(b); return 0; }