File xsa326-05.patch of Package xen.27273
From 6102ec4d215359dbc9433bc2bfbbf63e24f91761 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Tue, 13 Sep 2022 07:35:08 +0200
Subject: tools/xenstore: limit outstanding requests
Add another quota for limiting the number of outstanding requests of a
guest. As the way to specify quotas on the command line is becoming
rather nasty, switch to a new scheme using [--quota|-Q] <what>=<val>
allowing to add more quotas in future easily.
Set the default value to 20 (basically a random value not seeming to
be too high or too low).
A request is said to be outstanding if any message generated by this
request (the direct response plus potential watch events) is not yet
completely stored into a ring buffer. The initial watch event sent as
a result of registering a watch is an exception.
Note that across a live update the relation to buffered watch events
for other domains is lost.
Use talloc_zero() for allocating the domain structure in order to have
all per-domain quota zeroed initially.
This is part of XSA-326 / CVE-2022-42312.
Signed-off-by: Juergen Gross <jgross@suse.com>
Acked-by: Julien Grall <jgrall@amazon.com>
diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
index 98837ef2e9cf..2ed91d13297b 100644
--- a/tools/xenstore/xenstored_core.c
+++ b/tools/xenstore/xenstored_core.c
@@ -105,6 +105,7 @@ int quota_nb_watch_per_domain = 128;
int quota_max_entry_size = 2048; /* 2K */
int quota_max_transaction = 10;
int quota_nb_perms_per_node = 5;
+int quota_req_outstanding = 20;
unsigned int timeout_watch_event_msec = 20000;
@@ -220,12 +221,24 @@ static uint64_t get_now_msec(void)
return now_ts.tv_sec * 1000 + now_ts.tv_nsec / 1000000;
}
+/*
+ * Remove a struct buffered_data from the list of outgoing data.
+ * A struct buffered_data related to a request having caused watch events to be
+ * sent is kept until all those events have been written out.
+ * Each watch event is referencing the related request via pend.req, while the
+ * number of watch events caused by a request is kept in pend.ref.event_cnt
+ * (those two cases are mutually exclusive, so the two fields can share memory
+ * via a union).
+ * The struct buffered_data is freed only if no related watch event is
+ * referencing it. The related return data can be freed right away.
+ */
static void free_buffered_data(struct buffered_data *out,
struct connection *conn)
{
struct buffered_data *req;
list_del(&out->list);
+ out->on_out_list = false;
/*
* Update conn->timeout_msec with the next found timeout value in the
@@ -241,6 +254,30 @@ static void free_buffered_data(struct buffered_data *out,
}
}
+ if (out->hdr.msg.type == XS_WATCH_EVENT) {
+ req = out->pend.req;
+ if (req) {
+ req->pend.ref.event_cnt--;
+ if (!req->pend.ref.event_cnt && !req->on_out_list) {
+ if (req->on_ref_list) {
+ domain_outstanding_domid_dec(
+ req->pend.ref.domid);
+ list_del(&req->list);
+ }
+ talloc_free(req);
+ }
+ }
+ } else if (out->pend.ref.event_cnt) {
+ /* Hang out off from conn. */
+ talloc_steal(NULL, out);
+ if (out->buffer != out->default_buffer)
+ talloc_free(out->buffer);
+ list_add(&out->list, &conn->ref_list);
+ out->on_ref_list = true;
+ return;
+ } else
+ domain_outstanding_dec(conn);
+
talloc_free(out);
}
@@ -349,6 +386,7 @@ static bool write_messages(struct connection *conn)
static int destroy_conn(void *_conn)
{
struct connection *conn = _conn;
+ struct buffered_data *req;
/* Flush outgoing if possible, but don't block. */
if (!conn->domain) {
@@ -362,6 +400,11 @@ static int destroy_conn(void *_conn)
break;
close(conn->fd);
}
+
+ conn_free_buffered_data(conn);
+ list_for_each_entry(req, &conn->ref_list, list)
+ req->on_ref_list = false;
+
if (conn->target)
talloc_unlink(conn, conn->target);
list_del(&conn->list);
@@ -800,6 +843,8 @@ void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
/* Queue for later transmission. */
list_add_tail(&bdata->list, &conn->out_list);
+ bdata->on_out_list = true;
+ domain_outstanding_inc(conn);
}
/*
@@ -807,7 +852,8 @@ void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
* As this is not directly related to the current command, errors can't be
* reported.
*/
-void send_event(struct connection *conn, const char *path, const char *token)
+void send_event(struct buffered_data *req, struct connection *conn,
+ const char *path, const char *token)
{
struct buffered_data *bdata;
unsigned int len;
@@ -837,8 +883,13 @@ void send_event(struct connection *conn, const char *path, const char *token)
conn->timeout_msec = bdata->timeout_msec;
}
+ bdata->pend.req = req;
+ if (req)
+ req->pend.ref.event_cnt++;
+
/* Queue for later transmission. */
list_add_tail(&bdata->list, &conn->out_list);
+ bdata->on_out_list = true;
}
/* Some routines (write, mkdir, etc) just need a non-error return */
@@ -1574,6 +1625,7 @@ static void handle_input(struct connection *conn)
return;
}
in = conn->in;
+ in->pend.ref.domid = conn->id;
/* Not finished header yet? */
if (in->inhdr) {
@@ -1644,6 +1696,7 @@ struct connection *new_connection(connwritefn_t *write, connreadfn_t *read)
new->is_ignored = false;
new->transaction_started = 0;
INIT_LIST_HEAD(&new->out_list);
+ INIT_LIST_HEAD(&new->ref_list);
INIT_LIST_HEAD(&new->watches);
INIT_LIST_HEAD(&new->transaction_list);
@@ -2079,6 +2132,9 @@ static void usage(void)
" -W, --watch-nb <nb> limit the number of watches per domain,\n"
" -t, --transaction <nb> limit the number of transaction allowed per domain,\n"
" -A, --perm-nb <nb> limit the number of permissions per node,\n"
+" -Q, --quota <what>=<nb> set the quota <what> to the value <nb>, allowed\n"
+" quotas are:\n"
+" outstanding: number of outstanding requests\n"
" -w, --timeout <what>=<seconds> set the timeout in seconds for <what>,\n"
" allowed timeout candidates are:\n"
" watch-event: time a watch-event is kept pending\n"
@@ -2103,6 +2159,7 @@ static struct option options[] = {
{ "trace-file", 1, NULL, 'T' },
{ "transaction", 1, NULL, 't' },
{ "perm-nb", 1, NULL, 'A' },
+ { "quota", 1, NULL, 'Q' },
{ "timeout", 1, NULL, 'w' },
{ "no-recovery", 0, NULL, 'R' },
{ "internal-db", 0, NULL, 'I' },
@@ -2148,6 +2205,20 @@ static void set_timeout(const char *arg)
barf("unknown timeout \"%s\"\n", arg);
}
+static void set_quota(const char *arg)
+{
+ const char *eq = strchr(arg, '=');
+ int val;
+
+ if (!eq)
+ barf("quotas must be specified via <what>=<nb>\n");
+ val = get_optval_int(eq + 1);
+ if (what_matches(arg, "outstanding"))
+ quota_req_outstanding = val;
+ else
+ barf("unknown quota \"%s\"\n", arg);
+}
+
int main(int argc, char *argv[])
{
int opt;
@@ -2159,7 +2230,7 @@ int main(int argc, char *argv[])
int timeout;
- while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:A:T:RVW:w:", options,
+ while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:A:Q:T:RVW:w:", options,
NULL)) != -1) {
switch (opt) {
case 'D':
@@ -2204,6 +2275,9 @@ int main(int argc, char *argv[])
case 'A':
quota_nb_perms_per_node = strtol(optarg, NULL, 10);
break;
+ case 'Q':
+ set_quota(optarg);
+ break;
case 'w':
set_timeout(optarg);
break;
diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
index 3112c11811e5..edeaa96dd10b 100644
--- a/tools/xenstore/xenstored_core.h
+++ b/tools/xenstore/xenstored_core.h
@@ -45,6 +45,8 @@ typedef int32_t wrl_creditt;
struct buffered_data
{
struct list_head list;
+ bool on_out_list;
+ bool on_ref_list;
/* Are we still doing the header? */
bool inhdr;
@@ -52,6 +54,17 @@ struct buffered_data
/* How far are we? */
unsigned int used;
+ /* Outstanding request accounting. */
+ union {
+ /* ref is being used for requests. */
+ struct {
+ unsigned int event_cnt; /* # of outstanding events. */
+ unsigned int domid; /* domid of request. */
+ } ref;
+ /* req is being used for watch events. */
+ struct buffered_data *req; /* request causing event. */
+ } pend;
+
union {
struct xsd_sockmsg msg;
char raw[sizeof(struct xsd_sockmsg)];
@@ -93,6 +106,9 @@ struct connection
struct list_head out_list;
uint64_t timeout_msec;
+ /* Referenced requests no longer pending. */
+ struct list_head ref_list;
+
/* Transaction context for current request (NULL if none). */
struct transaction *transaction;
@@ -154,7 +170,8 @@ unsigned int get_strings(struct buffered_data *data,
void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
const void *data, unsigned int len);
-void send_event(struct connection *conn, const char *path, const char *token);
+void send_event(struct buffered_data *req, struct connection *conn,
+ const char *path, const char *token);
/* Some routines (write, mkdir, etc) just need a non-error return */
void send_ack(struct connection *conn, enum xsd_sockmsg_type type);
@@ -202,6 +219,7 @@ extern int dom0_domid;
extern int dom0_event;
extern int priv_domid;
extern int quota_nb_entry_per_domain;
+extern int quota_req_outstanding;
extern unsigned int timeout_watch_event_msec;
diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c
index 3bff322d024d..2dd80eb1a7bb 100644
--- a/tools/xenstore/xenstored_domain.c
+++ b/tools/xenstore/xenstored_domain.c
@@ -82,6 +82,9 @@ struct domain
/* number of watch for this domain */
int nbwatch;
+ /* Number of outstanding requests. */
+ int nboutstanding;
+
/* write rate limit */
wrl_creditt wrl_credit; /* [ -wrl_config_writecost, +_dburst ] */
struct wrl_timestampt wrl_timestamp;
@@ -284,8 +287,12 @@ bool domain_can_read(struct connection *conn)
{
struct xenstore_domain_interface *intf = conn->domain->interface;
- if (domain_is_unprivileged(conn) && conn->domain->wrl_credit < 0)
- return false;
+ if (domain_is_unprivileged(conn)) {
+ if (conn->domain->wrl_credit < 0)
+ return false;
+ if (conn->domain->nboutstanding >= quota_req_outstanding)
+ return false;
+ }
if (conn->is_ignored)
return false;
@@ -334,7 +341,7 @@ static struct domain *alloc_domain(void *context, unsigned int domid)
{
struct domain *domain;
- domain = talloc(context, struct domain);
+ domain = talloc_zero(context, struct domain);
if (!domain) {
errno = ENOMEM;
return NULL;
@@ -383,8 +390,6 @@ static int new_domain(struct domain *domain, int port)
domain->conn->id = domain->domid;
domain->remote_port = port;
- domain->nbentry = 0;
- domain->nbwatch = 0;
return 0;
}
@@ -922,6 +927,28 @@ int domain_watch(struct connection *conn)
: 0;
}
+void domain_outstanding_inc(struct connection *conn)
+{
+ if (!conn || !conn->domain)
+ return;
+ conn->domain->nboutstanding++;
+}
+
+void domain_outstanding_dec(struct connection *conn)
+{
+ if (!conn || !conn->domain)
+ return;
+ conn->domain->nboutstanding--;
+}
+
+void domain_outstanding_domid_dec(unsigned int domid)
+{
+ struct domain *d = find_domain_by_domid(domid);
+
+ if (d)
+ d->nboutstanding--;
+}
+
static wrl_creditt wrl_config_writecost = WRL_FACTOR;
static wrl_creditt wrl_config_rate = WRL_RATE * WRL_FACTOR;
static wrl_creditt wrl_config_dburst = WRL_DBURST * WRL_FACTOR;
diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h
index 5e00087206c7..4bff2e655b9b 100644
--- a/tools/xenstore/xenstored_domain.h
+++ b/tools/xenstore/xenstored_domain.h
@@ -67,6 +67,9 @@ int domain_entry(struct connection *conn);
void domain_watch_inc(struct connection *conn);
void domain_watch_dec(struct connection *conn);
int domain_watch(struct connection *conn);
+void domain_outstanding_inc(struct connection *conn);
+void domain_outstanding_dec(struct connection *conn);
+void domain_outstanding_domid_dec(unsigned int domid);
/* Special node permission handling. */
int set_perms_special(struct connection *conn, const char *name,
diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c
index 2f9367767e44..c50c0575f0f1 100644
--- a/tools/xenstore/xenstored_watch.c
+++ b/tools/xenstore/xenstored_watch.c
@@ -142,6 +142,7 @@ void fire_watches(struct connection *conn, const void *ctx, const char *name,
struct node *node, bool exact, struct node_perms *perms)
{
struct connection *i;
+ struct buffered_data *req;
struct watch *watch;
/* During transactions, don't fire watches, but queue them. */
@@ -150,6 +151,8 @@ void fire_watches(struct connection *conn, const void *ctx, const char *name,
return;
}
+ req = domain_is_unprivileged(conn) ? conn->in : NULL;
+
/* Create an event for each watch. */
list_for_each_entry(i, &connections, list) {
/* introduce/release domain watches */
@@ -164,12 +167,12 @@ void fire_watches(struct connection *conn, const void *ctx, const char *name,
list_for_each_entry(watch, &i->watches, list) {
if (exact) {
if (streq(name, watch->node))
- send_event(i,
+ send_event(req, i,
get_watch_path(watch, name),
watch->token);
} else {
if (is_child(name, watch->node))
- send_event(i,
+ send_event(req, i,
get_watch_path(watch, name),
watch->token);
}
@@ -238,8 +241,12 @@ int do_watch(struct connection *conn, struct buffered_data *in)
talloc_set_destructor(watch, destroy_watch);
send_ack(conn, XS_WATCH);
- /* We fire once up front: simplifies clients and restart. */
- send_event(conn, get_watch_path(watch, watch->node), watch->token);
+ /*
+ * We fire once up front: simplifies clients and restart.
+ * This event will not be linked to the XS_WATCH request.
+ */
+ send_event(NULL, conn, get_watch_path(watch, watch->node),
+ watch->token);
return 0;
}