File oprofile-add-support-for-ibm-power-event-codes-longer-than-sizeof-int.patch of Package oprofile

From: Maynard Johnson <maynardj@us.ibm.com>
Subject: Add support for IBM Power event codes longer than sizeof int
Date: Fri Nov 21 15:41:55 2014 -0600
Git-commit: a7d08172d5738f6e9b3e3ea68e585c1585f5ca21
References: FATE#319565, bsc#965789
Signed-off-by: Tony Jones <tonyj@suse.de>

    Add support for IBM Power event codes longer than sizeof int
    
    A small number of events on newer IBM Power processors have event codes
    that are larger than sizeof(int). Rather than change the width of the
    event code everywhere to be a long int (which would include having to
    change the sample file format), we have defined some internal-use-only
    unit masks for those events. These unit masks are not shown in the ophelp
    output, and IBM Power users should never use them in event specifications;
    instead, they should use the usual 'null' unit mask value of '0x0' in event
    specifications -- e.g.,
           PM_L1MISS_LAT_EXC_256:0x0:0:1
    
    See libpe_utils/op_pe_utils.cpp:_get_event_code for how these unit masks are
    used.
    
    Signed-off-by: Maynard Johnson <maynardj@us.ibm.com>

diff --git a/events/ppc64/power8/events b/events/ppc64/power8/events
index cc1163a..012ca89 100644
--- a/events/ppc64/power8/events
+++ b/events/ppc64/power8/events
@@ -451,10 +451,10 @@ event:0x30a8 counters:0,1,2,3 um:zero minimum:10000 name:PM_ISU_REJ_VS0 : VS0 IS
 event:0x30aa counters:0,1,2,3 um:zero minimum:10000 name:PM_ISU_REJ_VS1 : VS1 ISU reject
 event:0x38a8 counters:0,1,2,3 um:zero minimum:10000 name:PM_ISU_REJ_VSU : ISU
 event:0x30b8 counters:0,1,2,3 um:zero minimum:10000 name:PM_ISYNC : Isync count per thread
-event:0x200301ea counters:2 um:zero minimum:10000 name:PM_L1MISS_LAT_EXC_1024 : Reload latency exceeded 1024 cyc
-event:0x200401ec counters:3 um:zero minimum:10000 name:PM_L1MISS_LAT_EXC_2048 : Reload latency exceeded 2048 cyc
-event:0x200101e8 counters:0 um:zero minimum:10000 name:PM_L1MISS_LAT_EXC_256 : Reload latency exceeded 256 cyc
-event:0x200201e6 counters:1 um:zero minimum:10000 name:PM_L1MISS_LAT_EXC_32 : Reload latency exceeded 32 cyc
+event:0x200301ea counters:2 um:L1_latency minimum:10000 name:PM_L1MISS_LAT_EXC_1024 : Reload latency exceeded 1024 cyc
+event:0x200401ec counters:3 um:L1_latency minimum:10000 name:PM_L1MISS_LAT_EXC_2048 : Reload latency exceeded 2048 cyc
+event:0x200101e8 counters:0 um:L1_latency minimum:10000 name:PM_L1MISS_LAT_EXC_256 : Reload latency exceeded 256 cyc
+event:0x200201e6 counters:1 um:L1_latency minimum:10000 name:PM_L1MISS_LAT_EXC_32 : Reload latency exceeded 32 cyc
 event:0x26086 counters:1 um:zero minimum:10000 name:PM_L1PF_L2MEMACC : valid when first beat of data comes in for an L1pref where data came from mem(or L4)
 event:0x1002c counters:0 um:zero minimum:10000 name:PM_L1_DCACHE_RELOADED_ALL : L1 data cache reloaded for demand or prefetch .
 event:0x408c counters:0,1,2,3 um:zero minimum:10000 name:PM_L1_DEMAND_WRITE : Instruction Demand sectors wriittent into IL1
@@ -879,10 +879,10 @@ event:0x10054 counters:0 um:zero minimum:10000 name:PM_PUMP_CPRED : Pump predict
 event:0x40052 counters:3 um:zero minimum:10000 name:PM_PUMP_MPRED : Pump Mis prediction Counts across all types of pumpsfor all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate).
 event:0x16081 counters:0 um:zero minimum:10000 name:PM_RC0_ALLOC : 0.0
 event:0x16080 counters:0 um:zero minimum:10000 name:PM_RC0_BUSY : RC mach 0 Busy. Used by PMU to sample ave RC livetime(mach0 used as sample point)
-event:0x200301ea counters:2 um:zero minimum:10000 name:PM_RC_LIFETIME_EXC_1024 : Reload latency exceeded 1024 cyc
-event:0x200401ec counters:3 um:zero minimum:10000 name:PM_RC_LIFETIME_EXC_2048 : Threshold counter exceeded a value of 2048
-event:0x200101e8 counters:0 um:zero minimum:10000 name:PM_RC_LIFETIME_EXC_256 : Threshold counter exceed a count of 256
-event:0x200201e6 counters:1 um:zero minimum:10000 name:PM_RC_LIFETIME_EXC_32 : Reload latency exceeded 32 cyc
+event:0x200301ea counters:2 um:rc_machine minimum:10000 name:PM_RC_LIFETIME_EXC_1024 : Reload latency exceeded 1024 cyc
+event:0x200401ec counters:3 um:rc_machine minimum:10000 name:PM_RC_LIFETIME_EXC_2048 : Threshold counter exceeded a value of 2048
+event:0x200101e8 counters:0 um:rc_machine minimum:10000 name:PM_RC_LIFETIME_EXC_256 : Threshold counter exceed a count of 256
+event:0x200201e6 counters:1 um:rc_machine minimum:10000 name:PM_RC_LIFETIME_EXC_32 : Reload latency exceeded 32 cyc
 event:0x36088 counters:2 um:zero minimum:10000 name:PM_RC_USAGE : Continuous 16 cycle(2to1) window where this signals rotates thru sampling each L2 RC machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running
 event:0x34808e counters:3 um:zero minimum:10000 name:PM_RD_CLEARING_SC : rd clearing sc
 event:0x34808c counters:3 um:zero minimum:10000 name:PM_RD_FORMING_SC : rd forming sc
diff --git a/events/ppc64/power8/unit_masks b/events/ppc64/power8/unit_masks
index 988dd41..203af97 100644
--- a/events/ppc64/power8/unit_masks
+++ b/events/ppc64/power8/unit_masks
@@ -5,5 +5,13 @@
 #
 # ppc64 POWER8 possible unit masks
 #
+# NOTE: The 'rc_machine' and 'L1_latency' unit masks are for internal use only,
+#       to workaround oprofile's 32-bit limitation for event codes.
+#       See libpe_utils/op_pe_utils.cpp:_get_event_code for how these codes are
+#       used.
 name:zero type:mandatory default:0x0
 	0x0 No unit mask
+name:rc_machine type:mandatory default:0xde
+	0xde Thresholdable start/stop for rc machine for sampled instruction
+name:L1_latency type:mandatory default:0x67
+	0x67 Thresholdable start/stop for L1 sampled instruction load miss/reload
diff --git a/libop/op_events.c b/libop/op_events.c
index 8bfd3d2..29dc2f3 100644
--- a/libop/op_events.c
+++ b/libop/op_events.c
@@ -1081,12 +1081,21 @@ static int _is_um_valid_bitmask(struct op_event * event, u32 passed_um)
 	return retval;
 }
 
-int op_check_events(int ctr, u32 nr, u32 um, op_cpu cpu_type)
+static int _is_ppc64_cpu_type(op_cpu cpu_type) {
+	char const * cpu_name = op_get_cpu_name(cpu_type);
+	if (strncmp(cpu_name, "ppc64/power", strlen("ppc64/power")) == 0)
+		return 1;
+	else
+		return 0;
+}
+
+int op_check_events(char * evt_name, int ctr, u32 nr, u32 um, op_cpu cpu_type)
 {
 	int ret = OP_INVALID_EVENT;
 	size_t i;
 	u32 ctr_mask = 1 << ctr;
 	struct list_head * pos;
+	int ibm_power_proc = _is_ppc64_cpu_type(cpu_type);
 
 	load_events(cpu_type);
 
@@ -1095,6 +1104,11 @@ int op_check_events(int ctr, u32 nr, u32 um, op_cpu cpu_type)
 		if (event->val != nr)
 			continue;
 
+		// Why do we have to do this, since event codes are supposed to be unique?
+		// See the big comment below.
+		if (ibm_power_proc && strcmp(evt_name, event->name))
+			continue;
+
 		ret = OP_OK_EVENT;
 
 		if ((event->counter_mask & ctr_mask) == 0)
@@ -1108,7 +1122,28 @@ int op_check_events(int ctr, u32 nr, u32 um, op_cpu cpu_type)
 				if (event->unit->um[i].value == um)
 					break;
 			}
-			if (i == event->unit->num)
+			/* A small number of events on the IBM Power8 processor have real event
+			 * codes that are larger than sizeof(int). Rather than change the width of
+			 * the event code everywhere to be a long int (which would include having to
+			 * change the sample file format), we have defined some internal-use-only
+			 * unit masks for those events. In oprofile's power8 events file, we have
+			 * truncated those event codes to integer size, and the truncated bits are
+			 * used as a unit mask value which is ORed into the event code by
+			 * libpe_utils/op_pe_utils.cpp:_get_event_code(). This technique allowed
+			 * us to handle this situation with minimal code perturbation.  The one
+			 * downside is that the truncated event codes are not unique.  So in this
+			 * function, where we're searching for events by 'nr' (i.e., the event code),
+			 * we have to also make sure the name matches.
+			 *
+			 * If the user gives us an event specification such as:
+			 *      PM_L1MISS_LAT_EXC_256:0x0:1:1
+			 * the above code will actually find a non-zero unit mask for this event and
+			 * we'd normally fail at this point since the user passed '0x0' for a unit mask.
+			 * But we don't expose these internal-use-only UMs to the user, so there's
+			 * no way for them to know about it or to try to use it in their event spec;
+			 * thus, we handle it below.
+			 */
+			if ((i == event->unit->num) && !((um == 0) && ibm_power_proc))
 				ret |= OP_INVALID_UM;
 		}
 
diff --git a/libop/op_events.h b/libop/op_events.h
index be609f7..ec345e5 100644
--- a/libop/op_events.h
+++ b/libop/op_events.h
@@ -113,7 +113,7 @@ enum op_event_check {
  *
  * \sa op_cpu, OP_EVENTS_OK
  */
-int op_check_events(int ctr, u32 event, u32 um, op_cpu cpu_type);
+int op_check_events(char * name, int ctr, u32 event, u32 um, op_cpu cpu_type);
 
 /**
  * free memory used by any call to above function. Need to be called only once
diff --git a/libpe_utils/op_pe_utils.cpp b/libpe_utils/op_pe_utils.cpp
index 8c69894..c5b6ee7 100644
--- a/libpe_utils/op_pe_utils.cpp
+++ b/libpe_utils/op_pe_utils.cpp
@@ -379,7 +379,7 @@ out:
 static void _get_event_code(operf_event_t * event, op_cpu cpu_type)
 {
 	FILE * fp;
-	char oprof_event_code[9];
+	char oprof_event_code[11];
 	string command;
 	u64 base_code, config;
 	char buf[20];
@@ -412,7 +412,6 @@ static void _get_event_code(operf_event_t * event, op_cpu cpu_type)
 
 
 #if defined(__i386__) || defined(__x86_64__)
-	char mask[OP_MAX_UM_NAME_LEN];
 	// Setup EventSelct[11:8] field for AMD
 	const char * vendor_AMD = "AuthenticAMD";
 	if (op_is_cpu_vendor((char *)vendor_AMD)) {
@@ -422,8 +421,10 @@ static void _get_event_code(operf_event_t * event, op_cpu cpu_type)
 
 	// Setup EventSelct[7:0] field
 	config |= base_code & 0xFFULL;
-
-	// Setup unitmask field
+#endif
+#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc64__)
+	char mask[OP_MAX_UM_NAME_LEN];
+// Setup unitmask field
 handle_named_um:
 	if (event->um_name[0]) {
 		command = OP_BINDIR;
@@ -489,7 +490,12 @@ handle_named_um:
 			strncpy(event->um_name, mask, OP_MAX_UM_NAME_LEN - 1);
 			goto handle_named_um;
 		}
+#if defined(__powerpc64__)
+		config = base_code;
+		config |= ((event->evt_um & 0xFFULL) << 32);
+#else
 		config |= ((event->evt_um & 0xFFULL) << 8);
+#endif
 	} else {
 		config |= ((event->evt_um & 0xFFULL) << 8);
 	}
@@ -505,6 +511,7 @@ handle_named_um:
 		}
 	}
 	event->evt_code = config;
+	cverb << vdebug << "Final event code is " << hex << event->evt_code << endl;
 }
 
 #if PPC64_ARCH
diff --git a/utils/ophelp.c b/utils/ophelp.c
index e38e417..a80fec8 100644
--- a/utils/ophelp.c
+++ b/utils/ophelp.c
@@ -180,7 +180,7 @@ static void check_event(struct parsed_event * pev,
 	if (pev->unit_mask_name)
 		ret = 0;
 	else
-		ret = op_check_events(0, event->val, pev->unit_mask, cpu_type);
+		ret = op_check_events(pev->name, 0, event->val, pev->unit_mask, cpu_type);
 
 	if (ret & OP_INVALID_UM) {
 		fprintf(stderr, "Invalid unit mask 0x%x for event %s\n",
openSUSE Build Service is sponsored by