Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
SUSE:SLE-12-SP2:GA
oprofile
oprofile-add-support-for-ibm-power-event-codes-...
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File oprofile-add-support-for-ibm-power-event-codes-longer-than-sizeof-int.patch of Package oprofile
From: Maynard Johnson <maynardj@us.ibm.com> Subject: Add support for IBM Power event codes longer than sizeof int Date: Fri Nov 21 15:41:55 2014 -0600 Git-commit: a7d08172d5738f6e9b3e3ea68e585c1585f5ca21 References: FATE#319565, bsc#965789 Signed-off-by: Tony Jones <tonyj@suse.de> Add support for IBM Power event codes longer than sizeof int A small number of events on newer IBM Power processors have event codes that are larger than sizeof(int). Rather than change the width of the event code everywhere to be a long int (which would include having to change the sample file format), we have defined some internal-use-only unit masks for those events. These unit masks are not shown in the ophelp output, and IBM Power users should never use them in event specifications; instead, they should use the usual 'null' unit mask value of '0x0' in event specifications -- e.g., PM_L1MISS_LAT_EXC_256:0x0:0:1 See libpe_utils/op_pe_utils.cpp:_get_event_code for how these unit masks are used. Signed-off-by: Maynard Johnson <maynardj@us.ibm.com> diff --git a/events/ppc64/power8/events b/events/ppc64/power8/events index cc1163a..012ca89 100644 --- a/events/ppc64/power8/events +++ b/events/ppc64/power8/events @@ -451,10 +451,10 @@ event:0x30a8 counters:0,1,2,3 um:zero minimum:10000 name:PM_ISU_REJ_VS0 : VS0 IS event:0x30aa counters:0,1,2,3 um:zero minimum:10000 name:PM_ISU_REJ_VS1 : VS1 ISU reject event:0x38a8 counters:0,1,2,3 um:zero minimum:10000 name:PM_ISU_REJ_VSU : ISU event:0x30b8 counters:0,1,2,3 um:zero minimum:10000 name:PM_ISYNC : Isync count per thread -event:0x200301ea counters:2 um:zero minimum:10000 name:PM_L1MISS_LAT_EXC_1024 : Reload latency exceeded 1024 cyc -event:0x200401ec counters:3 um:zero minimum:10000 name:PM_L1MISS_LAT_EXC_2048 : Reload latency exceeded 2048 cyc -event:0x200101e8 counters:0 um:zero minimum:10000 name:PM_L1MISS_LAT_EXC_256 : Reload latency exceeded 256 cyc -event:0x200201e6 counters:1 um:zero minimum:10000 name:PM_L1MISS_LAT_EXC_32 : Reload latency exceeded 32 cyc +event:0x200301ea counters:2 um:L1_latency minimum:10000 name:PM_L1MISS_LAT_EXC_1024 : Reload latency exceeded 1024 cyc +event:0x200401ec counters:3 um:L1_latency minimum:10000 name:PM_L1MISS_LAT_EXC_2048 : Reload latency exceeded 2048 cyc +event:0x200101e8 counters:0 um:L1_latency minimum:10000 name:PM_L1MISS_LAT_EXC_256 : Reload latency exceeded 256 cyc +event:0x200201e6 counters:1 um:L1_latency minimum:10000 name:PM_L1MISS_LAT_EXC_32 : Reload latency exceeded 32 cyc event:0x26086 counters:1 um:zero minimum:10000 name:PM_L1PF_L2MEMACC : valid when first beat of data comes in for an L1pref where data came from mem(or L4) event:0x1002c counters:0 um:zero minimum:10000 name:PM_L1_DCACHE_RELOADED_ALL : L1 data cache reloaded for demand or prefetch . event:0x408c counters:0,1,2,3 um:zero minimum:10000 name:PM_L1_DEMAND_WRITE : Instruction Demand sectors wriittent into IL1 @@ -879,10 +879,10 @@ event:0x10054 counters:0 um:zero minimum:10000 name:PM_PUMP_CPRED : Pump predict event:0x40052 counters:3 um:zero minimum:10000 name:PM_PUMP_MPRED : Pump Mis prediction Counts across all types of pumpsfor all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate). event:0x16081 counters:0 um:zero minimum:10000 name:PM_RC0_ALLOC : 0.0 event:0x16080 counters:0 um:zero minimum:10000 name:PM_RC0_BUSY : RC mach 0 Busy. Used by PMU to sample ave RC livetime(mach0 used as sample point) -event:0x200301ea counters:2 um:zero minimum:10000 name:PM_RC_LIFETIME_EXC_1024 : Reload latency exceeded 1024 cyc -event:0x200401ec counters:3 um:zero minimum:10000 name:PM_RC_LIFETIME_EXC_2048 : Threshold counter exceeded a value of 2048 -event:0x200101e8 counters:0 um:zero minimum:10000 name:PM_RC_LIFETIME_EXC_256 : Threshold counter exceed a count of 256 -event:0x200201e6 counters:1 um:zero minimum:10000 name:PM_RC_LIFETIME_EXC_32 : Reload latency exceeded 32 cyc +event:0x200301ea counters:2 um:rc_machine minimum:10000 name:PM_RC_LIFETIME_EXC_1024 : Reload latency exceeded 1024 cyc +event:0x200401ec counters:3 um:rc_machine minimum:10000 name:PM_RC_LIFETIME_EXC_2048 : Threshold counter exceeded a value of 2048 +event:0x200101e8 counters:0 um:rc_machine minimum:10000 name:PM_RC_LIFETIME_EXC_256 : Threshold counter exceed a count of 256 +event:0x200201e6 counters:1 um:rc_machine minimum:10000 name:PM_RC_LIFETIME_EXC_32 : Reload latency exceeded 32 cyc event:0x36088 counters:2 um:zero minimum:10000 name:PM_RC_USAGE : Continuous 16 cycle(2to1) window where this signals rotates thru sampling each L2 RC machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running event:0x34808e counters:3 um:zero minimum:10000 name:PM_RD_CLEARING_SC : rd clearing sc event:0x34808c counters:3 um:zero minimum:10000 name:PM_RD_FORMING_SC : rd forming sc diff --git a/events/ppc64/power8/unit_masks b/events/ppc64/power8/unit_masks index 988dd41..203af97 100644 --- a/events/ppc64/power8/unit_masks +++ b/events/ppc64/power8/unit_masks @@ -5,5 +5,13 @@ # # ppc64 POWER8 possible unit masks # +# NOTE: The 'rc_machine' and 'L1_latency' unit masks are for internal use only, +# to workaround oprofile's 32-bit limitation for event codes. +# See libpe_utils/op_pe_utils.cpp:_get_event_code for how these codes are +# used. name:zero type:mandatory default:0x0 0x0 No unit mask +name:rc_machine type:mandatory default:0xde + 0xde Thresholdable start/stop for rc machine for sampled instruction +name:L1_latency type:mandatory default:0x67 + 0x67 Thresholdable start/stop for L1 sampled instruction load miss/reload diff --git a/libop/op_events.c b/libop/op_events.c index 8bfd3d2..29dc2f3 100644 --- a/libop/op_events.c +++ b/libop/op_events.c @@ -1081,12 +1081,21 @@ static int _is_um_valid_bitmask(struct op_event * event, u32 passed_um) return retval; } -int op_check_events(int ctr, u32 nr, u32 um, op_cpu cpu_type) +static int _is_ppc64_cpu_type(op_cpu cpu_type) { + char const * cpu_name = op_get_cpu_name(cpu_type); + if (strncmp(cpu_name, "ppc64/power", strlen("ppc64/power")) == 0) + return 1; + else + return 0; +} + +int op_check_events(char * evt_name, int ctr, u32 nr, u32 um, op_cpu cpu_type) { int ret = OP_INVALID_EVENT; size_t i; u32 ctr_mask = 1 << ctr; struct list_head * pos; + int ibm_power_proc = _is_ppc64_cpu_type(cpu_type); load_events(cpu_type); @@ -1095,6 +1104,11 @@ int op_check_events(int ctr, u32 nr, u32 um, op_cpu cpu_type) if (event->val != nr) continue; + // Why do we have to do this, since event codes are supposed to be unique? + // See the big comment below. + if (ibm_power_proc && strcmp(evt_name, event->name)) + continue; + ret = OP_OK_EVENT; if ((event->counter_mask & ctr_mask) == 0) @@ -1108,7 +1122,28 @@ int op_check_events(int ctr, u32 nr, u32 um, op_cpu cpu_type) if (event->unit->um[i].value == um) break; } - if (i == event->unit->num) + /* A small number of events on the IBM Power8 processor have real event + * codes that are larger than sizeof(int). Rather than change the width of + * the event code everywhere to be a long int (which would include having to + * change the sample file format), we have defined some internal-use-only + * unit masks for those events. In oprofile's power8 events file, we have + * truncated those event codes to integer size, and the truncated bits are + * used as a unit mask value which is ORed into the event code by + * libpe_utils/op_pe_utils.cpp:_get_event_code(). This technique allowed + * us to handle this situation with minimal code perturbation. The one + * downside is that the truncated event codes are not unique. So in this + * function, where we're searching for events by 'nr' (i.e., the event code), + * we have to also make sure the name matches. + * + * If the user gives us an event specification such as: + * PM_L1MISS_LAT_EXC_256:0x0:1:1 + * the above code will actually find a non-zero unit mask for this event and + * we'd normally fail at this point since the user passed '0x0' for a unit mask. + * But we don't expose these internal-use-only UMs to the user, so there's + * no way for them to know about it or to try to use it in their event spec; + * thus, we handle it below. + */ + if ((i == event->unit->num) && !((um == 0) && ibm_power_proc)) ret |= OP_INVALID_UM; } diff --git a/libop/op_events.h b/libop/op_events.h index be609f7..ec345e5 100644 --- a/libop/op_events.h +++ b/libop/op_events.h @@ -113,7 +113,7 @@ enum op_event_check { * * \sa op_cpu, OP_EVENTS_OK */ -int op_check_events(int ctr, u32 event, u32 um, op_cpu cpu_type); +int op_check_events(char * name, int ctr, u32 event, u32 um, op_cpu cpu_type); /** * free memory used by any call to above function. Need to be called only once diff --git a/libpe_utils/op_pe_utils.cpp b/libpe_utils/op_pe_utils.cpp index 8c69894..c5b6ee7 100644 --- a/libpe_utils/op_pe_utils.cpp +++ b/libpe_utils/op_pe_utils.cpp @@ -379,7 +379,7 @@ out: static void _get_event_code(operf_event_t * event, op_cpu cpu_type) { FILE * fp; - char oprof_event_code[9]; + char oprof_event_code[11]; string command; u64 base_code, config; char buf[20]; @@ -412,7 +412,6 @@ static void _get_event_code(operf_event_t * event, op_cpu cpu_type) #if defined(__i386__) || defined(__x86_64__) - char mask[OP_MAX_UM_NAME_LEN]; // Setup EventSelct[11:8] field for AMD const char * vendor_AMD = "AuthenticAMD"; if (op_is_cpu_vendor((char *)vendor_AMD)) { @@ -422,8 +421,10 @@ static void _get_event_code(operf_event_t * event, op_cpu cpu_type) // Setup EventSelct[7:0] field config |= base_code & 0xFFULL; - - // Setup unitmask field +#endif +#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc64__) + char mask[OP_MAX_UM_NAME_LEN]; +// Setup unitmask field handle_named_um: if (event->um_name[0]) { command = OP_BINDIR; @@ -489,7 +490,12 @@ handle_named_um: strncpy(event->um_name, mask, OP_MAX_UM_NAME_LEN - 1); goto handle_named_um; } +#if defined(__powerpc64__) + config = base_code; + config |= ((event->evt_um & 0xFFULL) << 32); +#else config |= ((event->evt_um & 0xFFULL) << 8); +#endif } else { config |= ((event->evt_um & 0xFFULL) << 8); } @@ -505,6 +511,7 @@ handle_named_um: } } event->evt_code = config; + cverb << vdebug << "Final event code is " << hex << event->evt_code << endl; } #if PPC64_ARCH diff --git a/utils/ophelp.c b/utils/ophelp.c index e38e417..a80fec8 100644 --- a/utils/ophelp.c +++ b/utils/ophelp.c @@ -180,7 +180,7 @@ static void check_event(struct parsed_event * pev, if (pev->unit_mask_name) ret = 0; else - ret = op_check_events(0, event->val, pev->unit_mask, cpu_type); + ret = op_check_events(pev->name, 0, event->val, pev->unit_mask, cpu_type); if (ret & OP_INVALID_UM) { fprintf(stderr, "Invalid unit mask 0x%x for event %s\n",
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor