File xsa206-0001-xenstored-apply-a-write-transaction-rate-limit.patch of Package xen.4507
References: bsc#1030144 XSA-206
From 45ef3d6a8a09e3827712a4485024ecc8e6c2f18a Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Wed, 15 Mar 2017 13:10:11 +0100
Subject: [PATCH 1/2] xenstored: apply a write transaction rate limit
This avoids a rogue client being about to stall another client (eg the
toolstack) indefinitely.
This is XSA-206.
Signed-off-by: Ian Jackson <Ian.Jackson@eu.citrix.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
tools/xenstore/xenstored_core.c | 9 +-
tools/xenstore/xenstored_core.h | 6 +
tools/xenstore/xenstored_domain.c | 215 +++++++++++++++++++++++++++++++++
tools/xenstore/xenstored_domain.h | 25 ++++
tools/xenstore/xenstored_transaction.c | 5 +
5 files changed, 259 insertions(+), 1 deletion(-)
Index: xen-4.4.4-testing/tools/xenstore/xenstored_core.c
===================================================================
--- xen-4.4.4-testing.orig/tools/xenstore/xenstored_core.c
+++ xen-4.4.4-testing/tools/xenstore/xenstored_core.c
@@ -326,6 +326,7 @@ static void initialize_fds(int sock, int
int *ptimeout)
{
struct connection *conn;
+ struct wrl_timestampt now;
if (fds)
memset(fds, 0, sizeof(struct pollfd) * current_array_size);
@@ -345,8 +346,11 @@ static void initialize_fds(int sock, int
xce_pollfd_idx = set_fd(xc_evtchn_fd(xce_handle),
POLLIN|POLLPRI);
+ wrl_gettime_now(&now);
+
list_for_each_entry(conn, &connections, list) {
if (conn->domain) {
+ wrl_check_timeout(conn->domain, now, ptimeout);
if (domain_can_read(conn) ||
(domain_can_write(conn) &&
!list_empty(&conn->out_list)))
@@ -440,6 +444,7 @@ static bool write_node(struct connection
goto error;
add_change_node(conn, node, false);
+ wrl_apply_debit_direct(conn);
data.dptr = talloc_size(node, data.dsize);
hdr = (void *)data.dptr;
@@ -897,8 +902,10 @@ static void delete_node_single(struct co
return;
}
- if (changed)
+ if (changed) {
add_change_node(conn, node, true);
+ wrl_apply_debit_direct(conn);
+ }
domain_entry_dec(conn, node);
}
Index: xen-4.4.4-testing/tools/xenstore/xenstored_core.h
===================================================================
--- xen-4.4.4-testing.orig/tools/xenstore/xenstored_core.h
+++ xen-4.4.4-testing/tools/xenstore/xenstored_core.h
@@ -34,6 +34,12 @@
/* DEFAULT_BUFFER_SIZE should be large enough for each errno string. */
#define DEFAULT_BUFFER_SIZE 16
+#define MIN(a, b) (((a) < (b))? (a) : (b))
+
+typedef int32_t wrl_creditt;
+#define WRL_CREDIT_MAX (1000*1000*1000)
+/* ^ satisfies non-overflow condition for wrl_xfer_credit */
+
struct buffered_data
{
struct list_head list;
Index: xen-4.4.4-testing/tools/xenstore/xenstored_domain.c
===================================================================
--- xen-4.4.4-testing.orig/tools/xenstore/xenstored_domain.c
+++ xen-4.4.4-testing/tools/xenstore/xenstored_domain.c
@@ -22,6 +22,7 @@
#include <unistd.h>
#include <stdlib.h>
#include <stdarg.h>
+#include <time.h>
#include <xenctrl.h>
#include "utils.h"
@@ -75,6 +76,10 @@ struct domain
/* number of watch for this domain */
int nbwatch;
+
+ /* write rate limit */
+ wrl_creditt wrl_credit; /* [ -wrl_config_writecost, +_dburst ] */
+ struct wrl_timestampt wrl_timestamp;
};
static LIST_HEAD(domains);
@@ -207,6 +212,8 @@ static int destroy_domain(void *_domain)
fire_watches(NULL, domain, "@releaseDomain", false);
+ wrl_domain_destroy(domain);
+
return 0;
}
@@ -254,6 +261,9 @@ void handle_event(void)
bool domain_can_read(struct connection *conn)
{
struct xenstore_domain_interface *intf = conn->domain->interface;
+
+ if (domain_is_unprivileged(conn) && conn->domain->wrl_credit < 0)
+ return false;
return (intf->req_cons != intf->req_prod);
}
@@ -290,6 +300,8 @@ static struct domain *new_domain(void *c
if (!domain->path)
return NULL;
+ wrl_domain_new(domain);
+
list_add(&domain->list, &domains);
talloc_set_destructor(domain, destroy_domain);
@@ -724,6 +736,209 @@ int domain_watch(struct connection *conn
: 0;
}
+static wrl_creditt wrl_config_writecost = WRL_FACTOR;
+static wrl_creditt wrl_config_rate = WRL_RATE * WRL_FACTOR;
+static wrl_creditt wrl_config_dburst = WRL_DBURST * WRL_FACTOR;
+static wrl_creditt wrl_config_gburst = WRL_GBURST * WRL_FACTOR;
+static wrl_creditt wrl_config_newdoms_dburst =
+ WRL_DBURST * WRL_NEWDOMS * WRL_FACTOR;
+
+long wrl_ntransactions;
+
+static long wrl_ndomains;
+static wrl_creditt wrl_reserve; /* [-wrl_config_newdoms_dburst, +_gburst ] */
+
+void wrl_gettime_now(struct wrl_timestampt *now_wt)
+{
+ struct timespec now_ts;
+ int r;
+
+ r = clock_gettime(CLOCK_MONOTONIC, &now_ts);
+ if (r)
+ barf_perror("Could not find time (clock_gettime failed)");
+
+ now_wt->sec = now_ts.tv_sec;
+ now_wt->msec = now_ts.tv_nsec / 1000000;
+}
+
+static void wrl_xfer_credit(wrl_creditt *debit, wrl_creditt debit_floor,
+ wrl_creditt *credit, wrl_creditt credit_ceil)
+ /*
+ * Transfers zero or more credit from "debit" to "credit".
+ * Transfers as much as possible while maintaining
+ * debit >= debit_floor and credit <= credit_ceil.
+ * (If that's violated already, does nothing.)
+ *
+ * Sufficient conditions to avoid overflow, either of:
+ * |every argument| <= 0x3fffffff
+ * |every argument| <= 1E9
+ * |every argument| <= WRL_CREDIT_MAX
+ * (And this condition is preserved.)
+ */
+{
+ wrl_creditt xfer = MIN( *debit - debit_floor,
+ credit_ceil - *credit );
+ if (xfer > 0) {
+ *debit -= xfer;
+ *credit += xfer;
+ }
+}
+
+void wrl_domain_new(struct domain *domain)
+{
+ domain->wrl_credit = 0;
+ wrl_gettime_now(&domain->wrl_timestamp);
+ wrl_ndomains++;
+ /* Steal up to DBURST from the reserve */
+ wrl_xfer_credit(&wrl_reserve, -wrl_config_newdoms_dburst,
+ &domain->wrl_credit, wrl_config_dburst);
+}
+
+void wrl_domain_destroy(struct domain *domain)
+{
+ wrl_ndomains--;
+ /*
+ * Don't bother recalculating domain's credit - this just
+ * means we don't give the reserve the ending domain's credit
+ * for time elapsed since last update.
+ */
+ wrl_xfer_credit(&domain->wrl_credit, 0,
+ &wrl_reserve, wrl_config_dburst);
+}
+
+void wrl_credit_update(struct domain *domain, struct wrl_timestampt now)
+{
+ /*
+ * We want to calculate
+ * credit += (now - timestamp) * RATE / ndoms;
+ * But we want it to saturate, and to avoid floating point.
+ * To avoid rounding errors from constantly adding small
+ * amounts of credit, we only add credit for whole milliseconds.
+ */
+ long seconds = now.sec - domain->wrl_timestamp.sec;
+ long milliseconds = now.msec - domain->wrl_timestamp.msec;
+ long msec;
+ int64_t denom, num;
+ wrl_creditt surplus;
+
+ seconds = MIN(seconds, 1000*1000); /* arbitrary, prevents overflow */
+ msec = seconds * 1000 + milliseconds;
+
+ if (msec < 0)
+ /* shouldn't happen with CLOCK_MONOTONIC */
+ msec = 0;
+
+ /* 32x32 -> 64 cannot overflow */
+ denom = (int64_t)msec * wrl_config_rate;
+ num = (int64_t)wrl_ndomains * 1000;
+ /* denom / num <= 1E6 * wrl_config_rate, so with
+ reasonable wrl_config_rate, denom / num << 2^64 */
+
+ /* at last! */
+ domain->wrl_credit = MIN( (int64_t)domain->wrl_credit + denom / num,
+ WRL_CREDIT_MAX );
+ /* (maybe briefly violating the DBURST cap on wrl_credit) */
+
+ /* maybe take from the reserve to make us nonnegative */
+ wrl_xfer_credit(&wrl_reserve, 0,
+ &domain->wrl_credit, 0);
+
+ /* return any surplus (over DBURST) to the reserve */
+ surplus = 0;
+ wrl_xfer_credit(&domain->wrl_credit, wrl_config_dburst,
+ &surplus, WRL_CREDIT_MAX);
+ wrl_xfer_credit(&surplus, 0,
+ &wrl_reserve, wrl_config_gburst);
+ /* surplus is now implicitly discarded */
+
+ domain->wrl_timestamp = now;
+
+ trace("wrl: dom %4d %6ld msec %9ld credit %9ld reserve"
+ " %9ld discard\n",
+ domain->domid,
+ msec,
+ (long)domain->wrl_credit, (long)wrl_reserve,
+ (long)surplus);
+}
+
+void wrl_check_timeout(struct domain *domain,
+ struct wrl_timestampt now,
+ int *ptimeout)
+{
+ uint64_t num, denom;
+ int wakeup;
+
+ wrl_credit_update(domain, now);
+
+ if (domain->wrl_credit >= 0)
+ /* not blocked */
+ return;
+
+ if (!*ptimeout)
+ /* already decided on immediate wakeup,
+ so no need to calculate our timeout */
+ return;
+
+ /* calculate wakeup = now + -credit / (RATE / ndoms); */
+
+ /* credit cannot go more -ve than one transaction,
+ * so the first multiplication cannot overflow even 32-bit */
+ num = (uint64_t)(-domain->wrl_credit * 1000) * wrl_ndomains;
+ denom = wrl_config_rate;
+
+ wakeup = MIN( num / denom /* uint64_t */, INT_MAX );
+ if (*ptimeout==-1 || wakeup < *ptimeout)
+ *ptimeout = wakeup;
+
+ trace("wrl: domain %u credit=%ld (reserve=%ld) SLEEPING for %d\n",
+ domain->domid,
+ (long)domain->wrl_credit, (long)wrl_reserve,
+ wakeup);
+}
+
+void wrl_apply_debit_actual(struct domain *domain)
+{
+ struct wrl_timestampt now;
+
+ if (!domain)
+ /* sockets escape the write rate limit */
+ return;
+
+ wrl_gettime_now(&now);
+ wrl_credit_update(domain, now);
+
+ domain->wrl_credit -= wrl_config_writecost;
+ trace("wrl: domain %u credit=%ld (reserve=%ld)\n",
+ domain->domid,
+ (long)domain->wrl_credit, (long)wrl_reserve);
+}
+
+void wrl_apply_debit_direct(struct connection *conn)
+{
+ if (!conn)
+ /* some writes are generated internally */
+ return;
+
+ if (conn->transaction)
+ /* these are accounted for when the transaction ends */
+ return;
+
+ if (!wrl_ntransactions)
+ /* we don't conflict with anyone */
+ return;
+
+ wrl_apply_debit_actual(conn->domain);
+}
+
+void wrl_apply_debit_trans_commit(struct connection *conn)
+{
+ if (wrl_ntransactions <= 1)
+ /* our own transaction appears in the counter */
+ return;
+
+ wrl_apply_debit_actual(conn->domain);
+}
+
/*
* Local variables:
* c-file-style: "linux"
Index: xen-4.4.4-testing/tools/xenstore/xenstored_domain.h
===================================================================
--- xen-4.4.4-testing.orig/tools/xenstore/xenstored_domain.h
+++ xen-4.4.4-testing/tools/xenstore/xenstored_domain.h
@@ -66,4 +66,29 @@ void domain_watch_inc(struct connection
void domain_watch_dec(struct connection *conn);
int domain_watch(struct connection *conn);
+/* Write rate limiting */
+
+#define WRL_FACTOR 1000 /* for fixed-point arithmetic */
+#define WRL_RATE 200
+#define WRL_DBURST 10
+#define WRL_GBURST 1000
+#define WRL_NEWDOMS 5
+
+struct wrl_timestampt {
+ time_t sec;
+ int msec;
+};
+
+extern long wrl_ntransactions;
+
+void wrl_gettime_now(struct wrl_timestampt *now_ts);
+void wrl_domain_new(struct domain *domain);
+void wrl_domain_destroy(struct domain *domain);
+void wrl_credit_update(struct domain *domain, struct wrl_timestampt now);
+void wrl_check_timeout(struct domain *domain,
+ struct wrl_timestampt now,
+ int *ptimeout);
+void wrl_apply_debit_direct(struct connection *conn);
+void wrl_apply_debit_trans_commit(struct connection *conn);
+
#endif /* _XENSTORED_DOMAIN_H */
Index: xen-4.4.4-testing/tools/xenstore/xenstored_transaction.c
===================================================================
--- xen-4.4.4-testing.orig/tools/xenstore/xenstored_transaction.c
+++ xen-4.4.4-testing/tools/xenstore/xenstored_transaction.c
@@ -140,6 +140,7 @@ static int destroy_transaction(void *_tr
{
struct transaction *trans = _transaction;
+ wrl_ntransactions--;
trace_destroy(trans, "transaction");
if (trans->tdb)
tdb_close(trans->tdb);
@@ -202,6 +203,7 @@ int do_transaction_start(struct connecti
talloc_steal(conn, trans);
talloc_set_destructor(trans, destroy_transaction);
conn->transaction_started++;
+ wrl_ntransactions++;
snprintf(id_str, sizeof(id_str), "%u", trans->id);
send_reply(conn, XS_TRANSACTION_START, id_str, strlen(id_str)+1);
@@ -233,6 +235,9 @@ int do_transaction_end(struct connection
/* FIXME: Merge, rather failing on any change. */
if (trans->generation != generation)
return EAGAIN;
+
+ wrl_apply_debit_trans_commit(conn);
+
if (!replace_tdb(trans->tdb_name, trans->tdb))
return errno;
/* Don't close this: we won! */