File 0001-Revert-remove-affinity_hint-infrastructure.patch of Package irqbalance.19035
From a43f7cdb820d4d81184b03c065f09ca7f27e8fce Mon Sep 17 00:00:00 2001
From: Olaf Hering <olaf@aepfle.de>
Date: Tue, 21 May 2019 16:18:55 +0200
Subject: Revert "remove affinity_hint infrastructure"
This reverts commit dcc411e7bfdd95bbdb7fd0af8699f8cafe686ff4.
---
 activate.c       | 28 ++++++++++++++++++++++++++--
 classify.c       | 35 ++++++++++++++++++++++++++++++++---
 irqbalance.1     | 19 +++++++++++++++++++
 irqbalance.c     | 15 ++++++++++++++-
 irqbalance.h     |  7 +++++++
 irqlist.c        |  7 +++++++
 placement.c      | 14 ++++++++++++++
 procinterrupts.c |  1 +
 8 files changed, 120 insertions(+), 6 deletions(-)
--- a/activate.c
+++ b/activate.c
@@ -69,9 +69,33 @@ static void activate_mapping(struct irq_
 	if (!info->moved)
 		return;
 
-	if (info->assigned_obj) {
+	if ((info->hint_policy == HINT_POLICY_EXACT) &&
+	    (!cpus_empty(info->affinity_hint))) {
+		if (cpus_intersects(info->affinity_hint, banned_cpus))
+			log(TO_ALL, LOG_WARNING,
+			    "irq %d affinity_hint and banned cpus confict\n",
+			    info->irq);
+		else {
+			applied_mask = info->affinity_hint;
+			valid_mask = 1;
+		}
+	} else if (info->assigned_obj) {
 		applied_mask = info->assigned_obj->mask;
-		valid_mask = 1;
+		if ((info->hint_policy == HINT_POLICY_SUBSET) &&
+		    (!cpus_empty(info->affinity_hint))) {
+			cpus_and(applied_mask, applied_mask, info->affinity_hint);
+			if (!cpus_intersects(applied_mask, unbanned_cpus)) {
+				if (!info->warned) {
+					info->warned = 1;
+					log(TO_ALL, LOG_WARNING,
+					    "irq %d affinity_hint subset empty\n",
+					    info->irq);
+				}
+			} else
+				valid_mask = 1;
+		} else {
+			valid_mask = 1;
+		}
 	}
 
 	/*
--- a/classify.c
+++ b/classify.c
@@ -31,6 +31,7 @@ struct user_irq_policy {
 	int level;
 	int numa_node_set;
 	int numa_node;
+	enum hp_e hintpolicy;
 };
 
 static GList *interrupts_db = NULL;
@@ -282,6 +283,7 @@ static void add_banned_irq(int irq, GLis
 
 	new->irq = irq;
 	new->flags |= IRQ_FLAG_BANNED;
+	new->hint_policy = HINT_POLICY_EXACT;
 
 	*list = g_list_append(*list, new);
 	log(TO_CONSOLE, LOG_INFO, "IRQ %d was BANNED.\n", irq);
@@ -345,6 +347,7 @@ void add_cl_banned_module(char *modname)
 static struct irq_info *add_one_irq_to_db(const char *devpath, int irq, struct user_irq_policy *pol)
 {
 	int irq_class = IRQ_OTHER;
+	int rc;
 	struct irq_info *new, find;
 	int numa_node;
 	char path[PATH_MAX];
@@ -375,6 +378,7 @@ static struct irq_info *add_one_irq_to_d
 
 	new->irq = irq;
 	new->class = IRQ_OTHER;
+	new->hint_policy = pol->hintpolicy; 
 
 	interrupts_db = g_list_append(interrupts_db, new);
 
@@ -398,7 +402,7 @@ get_numa_node:
 		sprintf(path, "%s/numa_node", devpath);
 		fd = fopen(path, "r");
 		if (fd) {
-			fscanf(fd, "%d", &numa_node);
+			rc = fscanf(fd, "%d", &numa_node);
 			fclose(fd);
 		}
 	}
@@ -412,7 +416,7 @@ get_numa_node:
 	fd = fopen(path, "r");
 	if (!fd) {
 		cpus_setall(new->cpumask);
-		goto out;
+		goto assign_affinity_hint;
 	}
 	lcpu_mask = NULL;
 	ret = getline(&lcpu_mask, &blen, fd);
@@ -424,6 +428,19 @@ get_numa_node:
 	}
 	free(lcpu_mask);
 
+assign_affinity_hint:
+	cpus_clear(new->affinity_hint);
+	sprintf(path, "/proc/irq/%d/affinity_hint", irq);
+	fd = fopen(path, "r");
+	if (!fd)
+		goto out;
+	lcpu_mask = NULL;
+	ret = getline(&lcpu_mask, &blen, fd);
+	fclose(fd);
+	if (ret <= 0)
+		goto out;
+	cpumask_parse_user(lcpu_mask, ret, new->affinity_hint);
+	free(lcpu_mask);
 out:
 	log(TO_CONSOLE, LOG_INFO, "Adding IRQ %d to database\n", irq);
 	return new;
@@ -482,6 +499,17 @@ static void parse_user_policy_key(char *
 		}
 		pol->numa_node = idx;
 		pol->numa_node_set = 1;
+	} else if (!strcasecmp("hintpolicy", key)) {
+		if (!strcasecmp("exact", value))
+			pol->hintpolicy = HINT_POLICY_EXACT;
+		else if (!strcasecmp("subset", value))
+			pol->hintpolicy = HINT_POLICY_SUBSET;
+		else if (!strcasecmp("ignore", value))
+			pol->hintpolicy = HINT_POLICY_IGNORE;
+		else {
+			key_set = 0;
+			log(TO_ALL, LOG_WARNING, "Unknown value for hitpolicy: %s\n", value);
+		}
 	} else {
 		key_set = 0;
 		log(TO_ALL, LOG_WARNING, "Unknown key returned, ignoring: %s\n", key);
@@ -506,6 +534,7 @@ static void get_irq_user_policy(char *pa
 	char *brc;
 
 	memset(pol, -1, sizeof(struct user_irq_policy));
+	pol->hintpolicy = global_hint_policy;
 
 	/* Return defaults if no script was given */
 	if (!polscript)
@@ -546,7 +575,7 @@ static int check_for_module_ban(char *na
 		return 0;
 }
 
-static int check_for_irq_ban(char *path __attribute__((unused)), int irq, GList *proc_interrupts)
+static int check_for_irq_ban(char *path, int irq, GList *proc_interrupts)
 {
 	struct irq_info find, *res;
 	GList *entry;
--- a/irqbalance.1
+++ b/irqbalance.1
@@ -50,6 +50,21 @@ Causes irqbalance to run in the foregrou
 Enables log output optimized for systemd-journal.
 
 .TP
+.B -h, --hintpolicy=[exact | subset | ignore]
+Set the policy for how IRQ kernel affinity hinting is treated.  Can be one of:
+.P
+.I exact
+IRQ affinity hint is applied unilaterally and never violated.
+.P
+.I subset
+IRQ is balanced, but the assigned object will be a subset of the affinity hint.
+.P
+.I ignore
+IRQ affinity hint value is completely ignored.
+.P
+The default value for hintpolicy is ignore.
+
+.TP
 .B -p, --powerthresh=<threshold>
 Set the threshold at which we attempt to move a CPU into powersave mode
 If more than <threshold> CPUs are more than 1 standard deviation below the
@@ -107,6 +122,10 @@ that irqbalance can bias IRQ affinity fo
 node.  Note that specifying a -1 here forces irqbalance to consider an interrupt
 from a device to be equidistant from all nodes.
 .TP
+.I hintpolicy=[exact | subset | ignore ]
+This allows a user to override the globally set hintpolicy for a given irq.  Use
+is identical to the --hintpolicy setting, but applied per irq
+.TP
 .B -s, --pid=<file>
 Have irqbalance write its process id to the specified file.  By default no
 pidfile is written.  The written pidfile is automatically unlinked when
--- a/irqbalance.c
+++ b/irqbalance.c
@@ -53,6 +53,7 @@ int journal_logging = 0;
 int need_rescan;
 unsigned int log_mask = TO_ALL;
 const char *log_indent;
+enum hp_e global_hint_policy = HINT_POLICY_IGNORE;
 unsigned long power_thresh = ULONG_MAX;
 unsigned long deepest_cache = 2;
 unsigned long long cycle_count = 0;
@@ -117,7 +118,7 @@ static void parse_command_line(int argc,
 	unsigned long val;
 
 	while ((opt = getopt_long(argc, argv,
-		"odfji:p:s:c:b:l:m:t:V",
+		"odfjh:i:p:s:c:b:l:m:t:V",
 		lopts, &longind)) != -1) {
 
 		switch(opt) {
@@ -155,6 +156,18 @@ static void parse_command_line(int argc,
 			case 'f':
 				foreground_mode=1;
 				break;
+			case 'h':
+				if (!strncmp(optarg, "exact", strlen(optarg)))
+					global_hint_policy = HINT_POLICY_EXACT;
+				else if (!strncmp(optarg, "subset", strlen(optarg)))
+					global_hint_policy = HINT_POLICY_SUBSET;
+				else if (!strncmp(optarg, "ignore", strlen(optarg)))
+					global_hint_policy = HINT_POLICY_IGNORE;
+				else {
+					usage();
+					exit(1);
+				}
+				break;
 			case 'i':
 				val = strtoull(optarg, NULL, 10);
 				if (val == ULONG_MAX) {
--- a/irqbalance.h
+++ b/irqbalance.h
@@ -65,10 +65,17 @@ extern GList *cpus;
 extern int numa_avail;
 extern GList *cl_banned_irqs;
 
+enum hp_e {
+	HINT_POLICY_IGNORE,
+	HINT_POLICY_SUBSET,
+	HINT_POLICY_EXACT
+};
+
 extern int debug_mode;
 extern int journal_logging;
 extern int one_shot_mode;
 extern int need_rescan;
+extern enum hp_e global_hint_policy;
 extern unsigned long long cycle_count;
 extern unsigned long power_thresh;
 extern unsigned long deepest_cache;
--- a/irqlist.c
+++ b/irqlist.c
@@ -77,6 +77,13 @@ static void move_candidate_irqs(struct i
 {
 	struct load_balance_info *lb_info = data;
 
+	/* never move an irq that has an afinity hint when 
+ 	 * hint_policy is HINT_POLICY_EXACT 
+ 	 */
+	if (info->hint_policy == HINT_POLICY_EXACT)
+		if (!cpus_empty(info->affinity_hint))
+			return;
+
 	/* Don't rebalance irqs that don't want it */
 	if (info->level == BALANCE_NONE)
 		return;
--- a/placement.c
+++ b/placement.c
@@ -41,6 +41,7 @@ static void find_best_object(struct topo
 {
 	struct obj_placement *best = (struct obj_placement *)data;
 	uint64_t newload;
+	cpumask_t subset;
 
 	/*
  	 * Don't consider the unspecified numa node here
@@ -56,6 +57,19 @@ static void find_best_object(struct topo
 	    (!cpus_intersects(d->mask, unbanned_cpus)))
 		return;
 
+	/*
+ 	 * If the hint policy is subset, then we only want 
+ 	 * to consider objects that are within the irqs hint, but
+ 	 * only if that irq in fact has published a hint
+ 	 */
+	if (best->info->hint_policy == HINT_POLICY_SUBSET) {
+		if (!cpus_empty(best->info->affinity_hint)) {
+			cpus_and(subset, best->info->affinity_hint, d->mask);
+			if (cpus_empty(subset))
+				return;
+		}
+	}
+
 	if (d->powersave_mode)
 		return;
 
--- a/procinterrupts.c
+++ b/procinterrupts.c
@@ -221,6 +221,7 @@ GList* collect_full_irq_list()
 				info->class = IRQ_OTHER;
 #endif
 			}
+			info->hint_policy = global_hint_policy;
 			info->name = strdupa(irq_mod);
 			tmp_list = g_list_append(tmp_list, info);
 		}