File ppc64_cpu-info-fix-bad-report-when-non-continuous-CP.patch of Package powerpc-utils.32342

From f1a8ed892e18b83cb0483e8f8f8cbc512fa8510c Mon Sep 17 00:00:00 2001
From: Laurent Dufour <ldufour@linux.ibm.com>
Date: Thu, 10 Aug 2023 11:47:07 +0200
Subject: [PATCH] ppc64_cpu/info: fix bad report when non continuous CPU ids

When CPU ids are not continuous, let say that the kernel didn't reuse a set
of CPU ids already used on a different nodes, the output of ppc64_cpu
--info is not correct.

For instance, in the example below the CPU id 48 to 55 haven't been reused
by the kernel when a CPU has been added after a LPM operation.
Note that the system is running in SMT=4.

The numactl -H command is providing the correct set of CPU:
ltczep3-lp4:~ # numactl -H
available: 2 nodes (0-1)
node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 64 65 66 67 68 69 70 71
node 0 size: 7177 MB
node 0 free: 4235 MB
node 1 cpus: 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
node 1 size: 24508 MB
node 1 free: 23539 MB
node distances:
node   0   1
  0:  10  40
  1:  40  10

But ppc64_cpu --info is reporting the CPUs 48 to 55 offlined while not
reporting at all the CPU 65 to 71:
ltczep3-lp4:~ # ppc64_cpu --info
Core   0:    0*    1*    2*    3*    4*    5*    6*    7*
Core   1:    8*    9*   10*   11*   12*   13*   14*   15*
Core   2:   16*   17*   18*   19*   20*   21*   22*   23*
Core   3:   24*   25*   26*   27*   28*   29*   30*   31*
Core   4:   32*   33*   34*   35*   36*   37*   38*   39*
Core   5:   40*   41*   42*   43*   44*   45*   46*   47*
Core   6:   48    49    50    51    52    53    54    55

This is because it is considering that the CPU id are continuous which is
not the case here.

To prevent that, when looking for a core, it is now first checking that the
physical_id of the first thread in that core is defined (not -1). If that
the case this means that CPU/core is present.

With that patch applied, ppc64_cpu --info is reporting:
ltczep3-lp4:~ # pc64_cpu --info
Core   0:    0*    1*    2*    3*    4     5     6     7
Core   1:    8*    9*   10*   11*   12    13    14    15
Core   2:   16*   17*   18*   19*   20    21    22    23
Core   3:   24*   25*   26*   27*   28    29    30    31
Core   4:   32*   33*   34*   35*   36    37    38    39
Core   5:   40*   41*   42*   43*   44    45    46    47
Core   6:   64*   65*   66*   67*   68    69    70    71

Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
---
 src/common/cpu_info_helpers.c | 14 ++++++++++++++
 src/common/cpu_info_helpers.h |  1 +
 src/ppc64_cpu.c               | 25 +++++++++++++++++--------
 3 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/src/common/cpu_info_helpers.c b/src/common/cpu_info_helpers.c
index c05d96d..8c57db8 100644
--- a/src/common/cpu_info_helpers.c
+++ b/src/common/cpu_info_helpers.c
@@ -83,6 +83,20 @@ int __sysattr_is_writeable(char *attribute, int threads_in_system)
 	return test_sysattr(attribute, W_OK, threads_in_system);
 }
 
+int cpu_physical_id(int thread)
+{
+	char path[SYSFS_PATH_MAX];
+	int rc, physical_id;
+
+	sprintf(path, SYSFS_CPUDIR"/physical_id", thread);
+	rc = get_attribute(path, "%d", &physical_id);
+
+	/* This attribute does not exist in kernels without hotplug enabled */
+	if (rc && errno == ENOENT)
+		return -1;
+	return physical_id;
+}
+
 int cpu_online(int thread)
 {
 	char path[SYSFS_PATH_MAX];
diff --git a/src/common/cpu_info_helpers.h b/src/common/cpu_info_helpers.h
index 8f09d79..c063fff 100644
--- a/src/common/cpu_info_helpers.h
+++ b/src/common/cpu_info_helpers.h
@@ -32,6 +32,7 @@
 
 extern int __sysattr_is_readable(char *attribute, int threads_in_system);
 extern int __sysattr_is_writeable(char *attribute, int threads_in_system);
+extern int cpu_physical_id(int thread);
 extern int cpu_online(int thread);
 extern int is_subcore_capable(void);
 extern int num_subcores(void);
diff --git a/src/ppc64_cpu.c b/src/ppc64_cpu.c
index 5fdf86a..ad9f4dc 100644
--- a/src/ppc64_cpu.c
+++ b/src/ppc64_cpu.c
@@ -1251,31 +1251,40 @@ static int do_cores_on(char *state)
 	return 0;
 }
 
+static bool core_is_online(int core)
+{
+	return  cpu_physical_id(core * threads_per_cpu) != -1;
+}
+
 static int do_info(void)
 {
 	int i, j, thread_num;
 	char online;
-	int subcores = 0;
+	int core, subcores = 0;
 
 	if (is_subcore_capable())
 		subcores = num_subcores();
 
-	for (i = 0; i < cpus_in_system; i++) {
+	for (i = 0, core = 0; core < cpus_in_system; i++) {
+
+		if (!core_is_online(i))
+			continue;
 
 		if (subcores > 1) {
-			if (i % subcores == 0)
-				printf("Core %3d:\n", i/subcores);
-			printf("  Subcore %3d: ", i);
+			if (core % subcores == 0)
+				printf("Core %3d:\n", core/subcores);
+			printf("  Subcore %3d: ", core);
 		} else {
-			printf("Core %3d: ", i);
+			printf("Core %3d: ", core);
 		}
 
-		for (j = 0; j < threads_per_cpu; j++) {
-			thread_num = i*threads_per_cpu + j;
+		thread_num = i * threads_per_cpu;
+		for (j = 0; j < threads_per_cpu; j++, thread_num++) {
 			online = cpu_online(thread_num) ? '*' : ' ';
 			printf("%4d%c ", thread_num, online);
 		}
 		printf("\n");
+		core++;
 	}
 	return 0;
 }
-- 
2.43.0

openSUSE Build Service is sponsored by