File Prevent_credential_abuse.patch of Package slurm.31097
From 39fadce8c5f2b0f0fb5de6f50a55ed823df66f68 Mon Sep 17 00:00:00 2001
From: Dominik Bartkiewicz <bart@schedmd.com>
Date: Wed, 4 May 2022 13:06:36 -0600
Subject: [PATCH 01/30] Prevent credential abuse.
CVE-2022-29500
---
NEWS | 1 +
src/api/config_info.c | 1 +
src/api/job_info.c | 2 +
src/api/job_step_info.c | 2 +
src/api/node_info.c | 1 +
src/api/pmi_server.c | 1 +
src/api/reconfigure.c | 1 +
src/api/signal.c | 3 +
src/api/slurm_pmi.c | 3 +
src/api/step_launch.c | 2 +
src/bcast/file_bcast.c | 1 +
src/common/forward.c | 5 +-
src/common/slurm_auth.c | 34 ++-
src/common/slurm_auth.h | 16 +-
src/common/slurm_persist_conn.c | 1 +
src/common/slurm_persist_conn.h | 1 +
src/common/slurm_protocol_api.c | 242 +++++++++++++++++-
src/common/slurm_protocol_api.h | 3 +
src/common/slurm_protocol_defs.c | 4 +
src/common/slurm_protocol_defs.h | 14 +
src/common/slurmdb_defs.c | 1 +
src/common/stepd_api.c | 3 +-
src/common/stepd_api.h | 3 +-
.../accounting_storage/common/common_as.c | 1 +
.../slurmdbd/slurmdbd_agent.c | 1 +
src/plugins/auth/munge/auth_munge.c | 64 +++--
src/plugins/auth/none/auth_none.c | 17 +-
src/plugins/mpi/pmi2/setup.c | 2 +
src/plugins/mpi/pmi2/setup.h | 1 +
src/plugins/mpi/pmi2/spawn.c | 3 +-
src/plugins/mpi/pmix/pmixp_dconn.c | 1 +
src/plugins/mpi/pmix/pmixp_dconn.h | 3 +
src/plugins/mpi/pmix/pmixp_server.c | 27 +-
src/plugins/mpi/pmix/pmixp_utils.c | 1 +
src/sattach/sattach.c | 1 +
src/slurmctld/agent.c | 19 ++
src/slurmctld/agent.h | 5 +
src/slurmctld/backup.c | 11 +-
src/slurmctld/controller.c | 2 +
src/slurmctld/fed_mgr.c | 2 +
src/slurmctld/job_mgr.c | 13 +-
src/slurmctld/job_scheduler.c | 3 +
src/slurmctld/node_mgr.c | 2 +-
src/slurmctld/node_scheduler.c | 3 +
src/slurmctld/ping_nodes.c | 4 +
src/slurmctld/srun_comm.c | 37 ++-
src/slurmctld/step_mgr.c | 3 +
src/slurmd/slurmd/req.c | 21 +-
src/slurmd/slurmd/slurmd.c | 6 +-
src/slurmd/slurmstepd/io.c | 1 +
src/slurmd/slurmstepd/mgr.c | 16 +-
src/slurmd/slurmstepd/mgr.h | 2 +-
src/slurmd/slurmstepd/req.c | 1 +
src/slurmd/slurmstepd/slurmstepd.c | 23 +-
src/slurmd/slurmstepd/slurmstepd_job.c | 7 +-
src/slurmd/slurmstepd/slurmstepd_job.h | 4 +-
src/slurmd/slurmstepd/x11_forwarding.c | 4 +
src/slurmdbd/read_config.c | 1 +
src/slurmdbd/slurmdbd.c | 1 +
59 files changed, 576 insertions(+), 82 deletions(-)
diff --git a/NEWS b/NEWS
index 5e01307e9d..abe45e4aee 100644
--- a/NEWS
+++ b/NEWS
@@ -1,6 +1,7 @@
This file describes changes in recent versions of Slurm. It primarily
documents those changes that are of interest to users and administrators.
+ -- CVE-2022-29500 - Prevent credential abuse.
-- CVE-2022-29501 - Prevent abuse of REQUEST_FORWARD_DATA.
* Changes in Slurm 18.08.9
diff --git a/src/api/config_info.c b/src/api/config_info.c
index 99e3159917..0070854cc7 100644
--- a/src/api/config_info.c
+++ b/src/api/config_info.c
@@ -1869,6 +1869,7 @@ slurm_load_slurmd_status(slurmd_status_t **slurmd_status_ptr)
}
req_msg.msg_type = REQUEST_DAEMON_STATUS;
req_msg.data = NULL;
+ slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY);
rc = slurm_send_recv_node_msg(&req_msg, &resp_msg, 0);
diff --git a/src/api/job_info.c b/src/api/job_info.c
index f5c300ee6d..3ebb0481cd 100644
--- a/src/api/job_info.c
+++ b/src/api/job_info.c
@@ -1472,6 +1472,7 @@ slurm_pid2jobid (pid_t job_pid, uint32_t *jobid)
req.job_pid = job_pid;
req_msg.msg_type = REQUEST_JOB_ID;
req_msg.data = &req;
+ slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY);
rc = slurm_send_recv_node_msg(&req_msg, &resp_msg, 0);
@@ -1847,6 +1848,7 @@ slurm_network_callerid (network_callerid_msg_t req, uint32_t *job_id,
req_msg.msg_type = REQUEST_NETWORK_CALLERID;
req_msg.data = &req;
+ slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY);
if (slurm_send_recv_node_msg(&req_msg, &resp_msg, 0) < 0)
return SLURM_ERROR;
diff --git a/src/api/job_step_info.c b/src/api/job_step_info.c
index 1efaf9a90b..72482eacec 100644
--- a/src/api/job_step_info.c
+++ b/src/api/job_step_info.c
@@ -618,6 +618,7 @@ extern int slurm_job_step_stat(uint32_t job_id, uint32_t step_id,
job_id, step_id, node_list);
slurm_msg_t_init(&req_msg);
+ slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY);
memset(&req, 0, sizeof(req));
resp_out->job_id = req.job_id = job_id;
@@ -734,6 +735,7 @@ extern int slurm_job_step_get_pids(uint32_t job_id, uint32_t step_id,
job_id, step_id, node_list);
slurm_msg_t_init(&req_msg);
+ slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY);
memset(&req, 0, sizeof(req));
resp_out->job_id = req.job_id = job_id;
diff --git a/src/api/node_info.c b/src/api/node_info.c
index 857711f5f2..4022fc70c0 100644
--- a/src/api/node_info.c
+++ b/src/api/node_info.c
@@ -858,6 +858,7 @@ extern int slurm_get_node_energy(char *host, uint16_t delta,
req.delta = delta;
req_msg.msg_type = REQUEST_ACCT_GATHER_ENERGY;
req_msg.data = &req;
+ slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY);
rc = slurm_send_recv_node_msg(&req_msg, &resp_msg, 0);
diff --git a/src/api/pmi_server.c b/src/api/pmi_server.c
index b150987113..54539f2044 100644
--- a/src/api/pmi_server.c
+++ b/src/api/pmi_server.c
@@ -141,6 +141,7 @@ static void *_msg_thread(void *x)
slurm_msg_t msg_send;
slurm_msg_t_init(&msg_send);
+ slurm_msg_set_r_uid(&msg_send, SLURM_AUTH_UID_ANY);
debug2("KVS_Barrier msg to %s:%hu",
msg_arg_ptr->bar_ptr->hostname,
diff --git a/src/api/reconfigure.c b/src/api/reconfigure.c
index 389f0dd594..f4a49f9a81 100644
--- a/src/api/reconfigure.c
+++ b/src/api/reconfigure.c
@@ -157,6 +157,7 @@ static int _send_message_controller(int dest, slurm_msg_t *req)
slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_CONNECTION_ERROR);
}
+ slurm_msg_set_r_uid(req, slurm_conf.slurm_user_id);
if (slurm_send_node_msg(fd, req) < 0) {
close(fd);
slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_SEND_ERROR);
diff --git a/src/api/signal.c b/src/api/signal.c
index e5cda8dc98..a036cf0190 100644
--- a/src/api/signal.c
+++ b/src/api/signal.c
@@ -59,6 +59,7 @@ static int _local_send_recv_rc_msgs(const char *nodelist,
slurm_msg_t *msg = xmalloc(sizeof(slurm_msg_t));
slurm_msg_t_init(msg);
+ slurm_msg_set_r_uid(msg, SLURM_AUTH_UID_ANY);
msg->msg_type = type;
msg->data = data;
@@ -101,6 +102,7 @@ static int _signal_batch_script_step(const resource_allocation_response_msg_t
rpc.flags = KILL_JOB_BATCH;
slurm_msg_t_init(&msg);
+ slurm_msg_set_r_uid(&msg, slurm_conf.slurmd_user_id);
msg.msg_type = REQUEST_SIGNAL_TASKS;
msg.data = &rpc;
if (slurm_conf_get_addr(name, &msg.address) == SLURM_ERROR) {
@@ -158,6 +160,7 @@ static int _terminate_batch_script_step(const resource_allocation_response_msg_t
slurm_msg_t_init(&msg);
msg.msg_type = REQUEST_TERMINATE_TASKS;
+ slurm_msg_set_r_uid(&msg, slurm_conf.slurmd_user_id);
msg.data = &rpc;
if (slurm_conf_get_addr(name, &msg.address) == SLURM_ERROR) {
diff --git a/src/api/slurm_pmi.c b/src/api/slurm_pmi.c
index c2c4dce054..4028105b26 100644
--- a/src/api/slurm_pmi.c
+++ b/src/api/slurm_pmi.c
@@ -178,6 +178,7 @@ int slurm_send_kvs_comm_set(kvs_comm_set_t *kvs_set_ptr,
_set_pmi_time();
slurm_msg_t_init(&msg_send);
+ slurm_msg_set_r_uid(&msg_send, SLURM_AUTH_UID_ANY);
msg_send.address = srun_addr;
msg_send.msg_type = PMI_KVS_PUT_REQ;
msg_send.data = (void *) kvs_set_ptr;
@@ -260,6 +261,7 @@ int slurm_get_kvs_comm_set(kvs_comm_set_t **kvs_set_ptr,
data.port = port;
data.hostname = hostname;
slurm_msg_t_init(&msg_send);
+ slurm_msg_set_r_uid(&msg_send, SLURM_AUTH_UID_ANY);
slurm_msg_t_init(&msg_rcv);
msg_send.address = srun_addr;
msg_send.msg_type = PMI_KVS_GET_REQ;
@@ -344,6 +346,7 @@ static int _forward_comm_set(kvs_comm_set_t *kvs_set_ptr)
if (kvs_set_ptr->kvs_host_ptr[i].port == 0)
continue; /* empty */
slurm_msg_t_init(&msg_send);
+ slurm_msg_set_r_uid(&msg_send, SLURM_AUTH_UID_ANY);
msg_send.msg_type = PMI_KVS_GET_RESP;
msg_send.data = (void *) kvs_set_ptr;
slurm_set_addr(&msg_send.address,
diff --git a/src/api/step_launch.c b/src/api/step_launch.c
index 50b68ae124..7c7b821977 100644
--- a/src/api/step_launch.c
+++ b/src/api/step_launch.c
@@ -898,6 +898,7 @@ extern void slurm_step_launch_fwd_signal(slurm_step_ctx_t *ctx, int signo)
hostlist_destroy(hl);
RESEND: slurm_msg_t_init(&req);
+ slurm_msg_set_r_uid(&req, SLURM_AUTH_UID_ANY);
req.msg_type = REQUEST_SIGNAL_TASKS;
req.data = &msg;
@@ -1722,6 +1723,7 @@ static int _launch_tasks(slurm_step_ctx_t *ctx,
}
slurm_msg_t_init(&msg);
+ slurm_msg_set_r_uid(&msg, SLURM_AUTH_UID_ANY);
msg.msg_type = REQUEST_LAUNCH_TASKS;
msg.data = launch_msg;
diff --git a/src/bcast/file_bcast.c b/src/bcast/file_bcast.c
index c44ef507ca..493493c6a7 100644
--- a/src/bcast/file_bcast.c
+++ b/src/bcast/file_bcast.c
@@ -190,6 +190,7 @@ static int _file_bcast(struct bcast_parameters *params,
slurm_msg_t msg;
slurm_msg_t_init(&msg);
+ slurm_msg_set_r_uid(&msg, SLURM_AUTH_UID_ANY);
msg.data = bcast_msg;
msg.msg_type = REQUEST_FILE_BCAST;
diff --git a/src/common/forward.c b/src/common/forward.c
index 79206b2ae0..79a4f43977 100644
--- a/src/common/forward.c
+++ b/src/common/forward.c
@@ -247,7 +247,7 @@ void *_forward_thread(void *arg)
/* steps, fwd_msg->timeout); */
}
- ret_list = slurm_receive_msgs(fd, steps, fwd_msg->timeout);
+ ret_list = slurm_receive_resp_msgs(fd, steps, fwd_msg->timeout);
/* info("sent %d forwards got %d back", */
/* fwd_msg->header.forward.cnt, list_count(ret_list)); */
@@ -355,6 +355,9 @@ void *_fwd_tree_thread(void *arg)
send_msg.msg_type = fwd_tree->orig_msg->msg_type;
send_msg.data = fwd_tree->orig_msg->data;
send_msg.protocol_version = fwd_tree->orig_msg->protocol_version;
+ if (fwd_tree->orig_msg->restrict_uid_set)
+ slurm_msg_set_r_uid(&send_msg,
+ fwd_tree->orig_msg->restrict_uid);
/* repeat until we are sure the message was sent */
while ((name = hostlist_shift(fwd_tree->tree_hl))) {
diff --git a/src/common/slurm_auth.c b/src/common/slurm_auth.c
index 6e7d9dfd91..556da92ad0 100644
--- a/src/common/slurm_auth.c
+++ b/src/common/slurm_auth.c
@@ -60,7 +60,8 @@ static bool init_run = false;
* end of the structure.
*/
typedef struct slurm_auth_ops {
- void * (*create) ( char *auth_info );
+ void * (*create) ( char *auth_infouid_t, uid_t r_uid,
+ void *data, int dlen);
int (*destroy) ( void *cred );
int (*verify) ( void *cred, char *auth_info );
uid_t (*get_uid) ( void *cred, char *auth_info );
@@ -70,6 +71,9 @@ typedef struct slurm_auth_ops {
int (*print) ( void *cred, FILE *fp );
int (*sa_errno) ( void *cred );
const char * (*sa_errstr) ( int slurm_errno );
+ bool (*hash_enable);
+ int (*get_data) (void *cred, char **data,
+ uint32_t *len);
} slurm_auth_ops_t;
/*
* These strings must be kept in the same order as the fields
@@ -85,7 +89,9 @@ static const char *syms[] = {
"slurm_auth_unpack",
"slurm_auth_print",
"slurm_auth_errno",
- "slurm_auth_errstr"
+ "slurm_auth_errstr",
+ "hash_enable",
+ "auth_p_get_data"
};
/*
@@ -125,6 +131,15 @@ slurm_auth_generic_errstr( int slurm_errno )
}
}
+extern bool slurm_get_plugin_hash_enable(int index)
+{
+ if (slurm_auth_init(NULL) < 0)
+ return true;
+
+ return *(ops[index].hash_enable);
+
+}
+
extern int slurm_auth_init( char *auth_type )
{
int retval = SLURM_SUCCESS;
@@ -182,12 +197,13 @@ slurm_auth_fini( void )
* the API function dispatcher.
*/
-void *g_slurm_auth_create(char *auth_info)
+void *g_slurm_auth_create(char *auth_info, uid_t r_uid,
+ void *data, int dlen)
{
if (slurm_auth_init(NULL) < 0)
return NULL;
- return (*(ops.create))(auth_info);
+ return (*(ops.create))(auth_info, r_uid, data, dlen);
}
int g_slurm_auth_destroy(void *cred)
@@ -206,6 +222,16 @@ int g_slurm_auth_verify(void *cred, char *auth_info)
return (*(ops.verify))(cred, auth_info);
}
+int auth_g_get_data(void *cred, char **data, uint32_t *len)
+{
+ cred_wrapper_t *wrap = (cred_wrapper_t *) cred;
+
+ if (!wrap || slurm_auth_init(NULL) < 0)
+ return SLURM_ERROR;
+
+ return (*(ops[wrap->index].get_data))(cred, data, len);
+}
+
uid_t g_slurm_auth_get_uid(void *cred, char *auth_info)
{
if (slurm_auth_init(NULL) < 0)
diff --git a/src/common/slurm_auth.h b/src/common/slurm_auth.h
index 84731c469e..d82cdf8775 100644
--- a/src/common/slurm_auth.h
+++ b/src/common/slurm_auth.h
@@ -88,6 +88,12 @@ enum {
ARG_COUNT,
};
+/*
+ * This should be equal to MUNGE_UID_ANY
+ * do not restrict decode via uid
+ */
+#define SLURM_AUTH_UID_ANY -1
+
/*
* Slurm authentication context opaque type.
*/
@@ -128,15 +134,23 @@ extern int slurm_auth_init( char *auth_type );
*/
extern int slurm_auth_fini( void );
+/*
+ * Check if plugin type corresponding to the authentication
+ * plugin index supports hash.
+ */
+extern bool slurm_get_plugin_hash_enable(int index);
+
/*
* Static bindings for the global authentication context.
*/
-extern void * g_slurm_auth_create(char *auth_info);
+extern void * g_slurm_auth_create(char *auth_info, uid_t r_uid,
+ void *data, int dlen);
extern int g_slurm_auth_destroy( void *cred );
extern int g_slurm_auth_verify(void *cred, char *auth_info);
extern uid_t g_slurm_auth_get_uid( void *cred, char *auth_info );
extern gid_t g_slurm_auth_get_gid( void *cred, char *auth_info );
extern int g_slurm_auth_pack( void *cred, Buf buf );
+extern int auth_g_get_data(void *cred, char **data, uint32_t *len);
/*
* WARNING! The returned auth pointer WILL have pointers
diff --git a/src/common/slurm_persist_conn.c b/src/common/slurm_persist_conn.c
index ea6c51bd4c..53e610d1c3 100644
--- a/src/common/slurm_persist_conn.c
+++ b/src/common/slurm_persist_conn.c
@@ -582,6 +582,7 @@ extern int slurm_persist_conn_open(slurm_persist_conn_t *persist_conn)
req_msg.flags |= SLURM_GLOBAL_AUTH_KEY;
if (persist_conn->flags & PERSIST_FLAG_DBD)
req_msg.flags |= SLURMDBD_CONNECTION;
+ slurm_msg_set_r_uid(&req_msg, persist_conn->r_uid);
memset(&req, 0, sizeof(persist_init_req_msg_t));
req.cluster_name = persist_conn->cluster_name;
diff --git a/src/common/slurm_persist_conn.h b/src/common/slurm_persist_conn.h
index e581fef101..a12499d460 100644
--- a/src/common/slurm_persist_conn.h
+++ b/src/common/slurm_persist_conn.h
@@ -74,6 +74,7 @@ typedef struct {
uint16_t flags;
bool inited;
persist_conn_type_t persist_type;
+ uid_t r_uid;
char *rem_host;
uint16_t rem_port;
time_t *shutdown;
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index 33a554b7b0..f9ea5e5a05 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -906,6 +906,36 @@ char *slurm_get_priority_weight_tres(void)
return weights;
}
+static int _check_hash(buf_t *buffer, header_t *header, slurm_msg_t *msg,
+ void *cred)
+{
+ char *cred_hash = NULL;
+ uint32_t cred_hash_len = 0;
+ int rc;
+ static time_t config_update = (time_t) -1;
+ static bool block_null_hash = true;
+
+ if (config_update != slurm_conf.last_update) {
+ block_null_hash = (xstrcasestr(slurm_conf.comm_params,
+ "block_null_hash"));
+ config_update = slurm_conf.last_update;
+ }
+
+ rc = auth_g_get_data(cred, &cred_hash, &cred_hash_len);
+
+ if (cred_hash || cred_hash_len) {
+ if (cred_hash_len != 3 || cred_hash[0] != 1 ||
+ memcmp(cred_hash + 1,
+ &msg->msg_type, sizeof(msg->msg_type)))
+ rc = SLURM_ERROR;
+ } else if (block_null_hash &&
+ slurm_get_plugin_hash_enable(msg->auth_index))
+ rc = SLURM_ERROR;
+
+ xfree(cred_hash);
+ return rc;
+}
+
static int _get_tres_id(char *type, char *name)
{
slurmdb_tres_rec_t tres_rec;
@@ -3299,6 +3329,9 @@ extern int slurm_unpack_received_msg(slurm_msg_t *msg, int fd, Buf buffer)
goto total_return;
}
+ msg->auth_uid = g_slurm_auth_get_uid(auth_cred);
+ msg->auth_uid_set = true;
+
/*
* Unpack message body
*/
@@ -3309,6 +3342,7 @@ extern int slurm_unpack_received_msg(slurm_msg_t *msg, int fd, Buf buffer)
msg->body_offset = get_buf_offset(buffer);
if ((header.body_length > remaining_buf(buffer)) ||
+ _check_hash(buffer, &header, msg, auth_cred) ||
(unpack_msg(msg, buffer) != SLURM_SUCCESS)) {
rc = ESLURM_PROTOCOL_INCOMPLETE_PACKET;
(void) g_slurm_auth_destroy(auth_cred);
@@ -3409,6 +3443,8 @@ int slurm_receive_msg(int fd, slurm_msg_t *msg, int timeout)
*/
if (slurm_msg_recvfrom_timeout(fd, &buf, &buflen, 0, timeout) < 0) {
rc = errno;
+ if (!rc)
+ rc = SLURMCTLD_COMMUNICATIONS_RECEIVE_ERROR;
goto endit;
}
@@ -3571,6 +3607,8 @@ List slurm_receive_msgs(int fd, int steps, int timeout)
goto total_return;
}
+ msg.auth_uid = g_slurm_auth_get_uid(auth_cred);
+ msg.auth_uid_set = true;
/*
* Unpack message body
*/
@@ -3579,6 +3617,7 @@ List slurm_receive_msgs(int fd, int steps, int timeout)
msg.flags = header.flags;
if ((header.body_length > remaining_buf(buffer)) ||
+ _check_hash(buffer, &header, &msg, auth_cred) ||
(unpack_msg(&msg, buffer) != SLURM_SUCCESS)) {
(void) g_slurm_auth_destroy(auth_cred);
free_buf(buffer);
@@ -3620,6 +3659,155 @@ total_return:
}
+List slurm_receive_resp_msgs(int fd, int steps, int timeout)
+{
+ char *buf = NULL;
+ size_t buflen = 0;
+ header_t header;
+ int rc;
+ void *auth_cred = NULL;
+ slurm_msg_t msg;
+ buf_t *buffer;
+ ret_data_info_t *ret_data_info = NULL;
+ List ret_list = NULL;
+ int orig_timeout = timeout;
+
+ xassert(fd >= 0);
+
+ slurm_msg_t_init(&msg);
+ msg.conn_fd = fd;
+
+ if (timeout <= 0) {
+ /* convert secs to msec */
+ timeout = slurm_conf.msg_timeout * 1000;
+ orig_timeout = timeout;
+ }
+ if (steps) {
+ if (message_timeout < 0)
+ message_timeout = slurm_conf.msg_timeout * 1000;
+ orig_timeout = (timeout -
+ (message_timeout*(steps-1)))/steps;
+ steps--;
+ }
+
+ log_flag(NET, "%s: orig_timeout was %d we have %d steps and a timeout of %d",
+ __func__, orig_timeout, steps, timeout);
+ /* we compare to the orig_timeout here because that is really
+ * what we are going to wait for each step
+ */
+ if (orig_timeout >= (slurm_conf.msg_timeout * 10000)) {
+ log_flag(NET, "%s: Sending a message with timeout's greater than %d seconds, requested timeout is %d seconds",
+ __func__, (slurm_conf.msg_timeout * 10),
+ (timeout/1000));
+ } else if (orig_timeout < 1000) {
+ log_flag(NET, "%s: Sending a message with a very short timeout of %d milliseconds each step in the tree has %d milliseconds",
+ __func__, timeout, orig_timeout);
+ }
+
+
+ /*
+ * Receive a msg. slurm_msg_recvfrom() will read the message
+ * length and allocate space on the heap for a buffer containing
+ * the message.
+ */
+ if (slurm_msg_recvfrom_timeout(fd, &buf, &buflen, 0, timeout) < 0) {
+ forward_init(&header.forward);
+ rc = errno;
+ goto total_return;
+ }
+
+ log_flag_hex(NET_RAW, buf, buflen, "%s: read", __func__);
+ buffer = create_buf(buf, buflen);
+
+ if (unpack_header(&header, buffer) == SLURM_ERROR) {
+ free_buf(buffer);
+ rc = SLURM_COMMUNICATIONS_RECEIVE_ERROR;
+ goto total_return;
+ }
+
+ if (check_header_version(&header) < 0) {
+ slurm_addr_t resp_addr;
+ if (!slurm_get_peer_addr(fd, &resp_addr)) {
+ error("%s: Invalid Protocol Version %u from at %pA",
+ __func__, header.version, &resp_addr);
+ } else {
+ error("%s: Invalid Protocol Version %u from problem connection: %m",
+ __func__, header.version);
+ }
+
+ free_buf(buffer);
+ rc = SLURM_PROTOCOL_VERSION_ERROR;
+ goto total_return;
+ }
+ //info("ret_cnt = %d",header.ret_cnt);
+ if (header.ret_cnt > 0) {
+ if (header.ret_list)
+ ret_list = header.ret_list;
+ else
+ ret_list = list_create(destroy_data_info);
+ header.ret_cnt = 0;
+ header.ret_list = NULL;
+ }
+
+ /* Forward message to other nodes */
+ if (header.forward.cnt > 0) {
+ error("%s: We need to forward this to other nodes use slurm_receive_msg_and_forward instead",
+ __func__);
+ }
+
+ if (!(auth_cred = g_slurm_auth_unpack(buffer, header.version))) {
+ error("%s: auth_g_unpack: %m", __func__);
+ free_buf(buffer);
+ rc = ESLURM_PROTOCOL_INCOMPLETE_PACKET;
+ goto total_return;
+ }
+ g_slurm_auth_destroy(auth_cred);
+ /*
+ * Unpack message body
+ */
+ msg.protocol_version = header.version;
+ msg.msg_type = header.msg_type;
+ msg.flags = header.flags;
+
+ if ((header.body_length > remaining_buf(buffer)) ||
+ (unpack_msg(&msg, buffer) != SLURM_SUCCESS)) {
+ free_buf(buffer);
+ rc = ESLURM_PROTOCOL_INCOMPLETE_PACKET;
+ goto total_return;
+ }
+ free_buf(buffer);
+ rc = SLURM_SUCCESS;
+
+total_return:
+ destroy_forward(&header.forward);
+
+ if (rc != SLURM_SUCCESS) {
+ if (ret_list) {
+ ret_data_info = xmalloc(sizeof(ret_data_info_t));
+ ret_data_info->err = rc;
+ ret_data_info->type = RESPONSE_FORWARD_FAILED;
+ ret_data_info->data = NULL;
+ list_push(ret_list, ret_data_info);
+ }
+
+ error("%s: failed: %s",
+ __func__, slurm_strerror(rc));
+ usleep(10000); /* Discourage brute force attack */
+ } else {
+ if (!ret_list)
+ ret_list = list_create(destroy_data_info);
+ ret_data_info = xmalloc(sizeof(ret_data_info_t));
+ ret_data_info->err = rc;
+ ret_data_info->node_name = NULL;
+ ret_data_info->type = msg.msg_type;
+ ret_data_info->data = msg.data;
+ list_push(ret_list, ret_data_info);
+ }
+
+ errno = rc;
+ return ret_list;
+
+}
/* try to determine the UID associated with a message with different
* message header version, return -1 if we can't tell */
static int _unpack_msg_uid(Buf buffer)
@@ -3805,6 +3993,9 @@ int slurm_receive_msg_and_forward(int fd, slurm_addr_t *orig_addr,
goto total_return;
}
+ msg->auth_uid = g_slurm_auth_get_uid(auth_cred);
+ msg->auth_uid_set = true;
+
/*
* Unpack message body
*/
@@ -3818,6 +4009,7 @@ int slurm_receive_msg_and_forward(int fd, slurm_addr_t *orig_addr,
}
if ( (header.body_length > remaining_buf(buffer)) ||
+ _check_hash(buffer, &header, msg, auth_cred) ||
(unpack_msg(msg, buffer) != SLURM_SUCCESS) ) {
(void) g_slurm_auth_destroy(auth_cred);
free_buf(buffer);
@@ -3885,6 +4077,7 @@ int slurm_send_node_msg(int fd, slurm_msg_t * msg)
int rc;
void * auth_cred;
time_t start_time = time(NULL);
+ unsigned char auth_payload[3] = { 1 }; /* uint8_t + uint16_t (msg_type) */
if (msg->conn) {
persist_msg_t persist_msg;
@@ -3920,6 +4113,9 @@ int slurm_send_node_msg(int fd, slurm_msg_t * msg)
return rc;
}
+ if (!msg->restrict_uid_set)
+ fatal("%s: restrict_uid is not set", __func__);
+ memcpy(auth_payload + 1, &msg->msg_type, sizeof(msg->msg_type));
/*
* Initialize header with Auth credential and message type.
* We get the credential now rather than later so the work can
@@ -3928,10 +4124,14 @@ int slurm_send_node_msg(int fd, slurm_msg_t * msg)
* wait too long for the incoming message.
*/
if (msg->flags & SLURM_GLOBAL_AUTH_KEY) {
- auth_cred = g_slurm_auth_create(_global_auth_key());
+ auth_cred = g_slurm_auth_create(_global_auth_key(),
+ msg->restrict_uid, auth_payload,
+ sizeof(auth_payload));
} else {
char *auth_info = slurm_get_auth_info();
- auth_cred = g_slurm_auth_create(auth_info);
+ auth_cred = g_slurm_auth_create(auth_info,
+ msg->restrict_uid, auth_payload,
+ sizeof(auth_payload));
xfree(auth_info);
}
@@ -3948,10 +4148,16 @@ int slurm_send_node_msg(int fd, slurm_msg_t * msg)
if (difftime(time(NULL), start_time) >= 60) {
(void) g_slurm_auth_destroy(auth_cred);
if (msg->flags & SLURM_GLOBAL_AUTH_KEY) {
- auth_cred = g_slurm_auth_create(_global_auth_key());
+ auth_cred = g_slurm_auth_create(_global_auth_key(),
+ msg->restrict_uid,
+ auth_payload,
+ sizeof(auth_payload));
} else {
char *auth_info = slurm_get_auth_info();
- auth_cred = g_slurm_auth_create(auth_info);
+ auth_cred = g_slurm_auth_create(auth_info,
+ msg->restrict_uid,
+ auth_payload,
+ sizeof(auth_payload));
xfree(auth_info);
}
}
@@ -4187,6 +4393,24 @@ static void _resp_msg_setup(slurm_msg_t *msg, slurm_msg_t *resp_msg,
resp_msg->protocol_version = msg->protocol_version;
resp_msg->ret_list = msg->ret_list;
resp_msg->orig_addr = msg->orig_addr;
+ /*
+ * Extra sanity check. This should always be set. But if for some
+ * reason it isn't, restrict the decode to avoid leaking an
+ * unrestricted authentication token.
+ *
+ * Implicitly trust communications initiated by SlurmUser and
+ * SlurmdUser. In future releases this won't matter - there's
+ * no point packing an auth token on the reply as it isn't checked,
+ * but we're stuck doing that on older protocol versions for
+ * backwards-compatibility.
+ */
+ if (!msg->auth_uid_set)
+ slurm_msg_set_r_uid(resp_msg, SLURM_AUTH_NOBODY);
+ else if ((msg->auth_uid != slurm_conf.slurm_user_id) &&
+ (msg->auth_uid != slurm_conf.slurmd_user_id))
+ slurm_msg_set_r_uid(resp_msg, msg->auth_uid);
+ else
+ slurm_msg_set_r_uid(resp_msg, SLURM_AUTH_UID_ANY);
}
static void _rc_msg_setup(slurm_msg_t *msg, slurm_msg_t *resp_msg,
@@ -4487,6 +4711,7 @@ extern int slurm_send_recv_controller_msg(slurm_msg_t * request_msg,
forward_init(&request_msg->forward, NULL);
request_msg->ret_list = NULL;
request_msg->forward_struct = NULL;
+ slurm_msg_set_r_uid(request_msg, SLURM_AUTH_UID_ANY);
tryagain:
retry = 1;
@@ -4615,6 +4840,8 @@ extern int slurm_send_only_controller_msg(slurm_msg_t *req,
goto cleanup;
}
+ slurm_msg_set_r_uid(req, slurm_conf.slurm_user_id);
+
if ((rc = slurm_send_node_msg(fd, req)) < 0) {
rc = SLURM_ERROR;
} else {
@@ -4995,6 +5222,12 @@ extern void slurm_free_msg(slurm_msg_t *msg)
}
}
+extern void slurm_msg_set_r_uid(slurm_msg_t *msg, uid_t r_uid)
+{
+ msg->restrict_uid = r_uid;
+ msg->restrict_uid_set = true;
+}
+
extern char *nodelist_nth_host(const char *nodelist, int inx)
{
hostlist_t hl = hostlist_create(nodelist);
@@ -5193,6 +5426,7 @@ extern int slurm_forward_data(
req.len = len;
req.data = (char *)data;
+ slurm_msg_set_r_uid(&msg, SLURM_AUTH_UID_ANY);
msg.msg_type = REQUEST_FORWARD_DATA;
msg.data = &req;
diff --git a/src/common/slurm_protocol_api.h b/src/common/slurm_protocol_api.h
index 49066aa7f0..826de757d5 100644
--- a/src/common/slurm_protocol_api.h
+++ b/src/common/slurm_protocol_api.h
@@ -1041,6 +1041,7 @@ int slurm_receive_msg(int fd, slurm_msg_t *msg, int timeout);
* errno set.
*/
List slurm_receive_msgs(int fd, int steps, int timeout);
+List slurm_receive_resp_msgs(int fd, int steps, int timeout);
/*
* Receive a slurm message on the open slurm descriptor "fd" waiting
@@ -1351,6 +1352,8 @@ extern int *set_span(int total, uint16_t tree_width);
extern void slurm_free_msg_members(slurm_msg_t *msg);
extern void slurm_free_msg(slurm_msg_t * msg);
+extern void slurm_msg_set_r_uid(slurm_msg_t *msg, uid_t r_uid);
+
/* must free this memory with free not xfree */
extern char *nodelist_nth_host(const char *nodelist, int inx);
extern int nodelist_find(const char *nodelist, const char *name);
diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c
index 54f32f5a16..b864ab56c6 100644
--- a/src/common/slurm_protocol_defs.c
+++ b/src/common/slurm_protocol_defs.c
@@ -51,6 +51,7 @@
#include "src/common/power.h"
#include "src/common/slurm_accounting_storage.h"
#include "src/common/slurm_acct_gather_energy.h"
+#include "src/common/slurm_auth.h"
#include "src/common/slurm_cred.h"
#include "src/common/slurm_ext_sensors.h"
#include "src/common/slurm_jobacct_gather.h"
@@ -100,6 +101,7 @@ extern void slurm_msg_t_init(slurm_msg_t *msg)
{
memset(msg, 0, sizeof(slurm_msg_t));
+ msg->auth_uid = SLURM_AUTH_NOBODY;
msg->conn_fd = -1;
msg->msg_type = NO_VAL16;
msg->protocol_version = NO_VAL16;
@@ -129,6 +131,8 @@ extern void slurm_msg_t_copy(slurm_msg_t *dest, slurm_msg_t *src)
dest->ret_list = src->ret_list;
dest->forward_struct = src->forward_struct;
dest->orig_addr.sin_addr.s_addr = 0;
+ if (src->auth_uid_set)
+ slurm_msg_set_r_uid(dest, src->auth_uid);
return;
}
diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h
index 9daaecb3b5..40980b7f13 100644
--- a/src/common/slurm_protocol_defs.h
+++ b/src/common/slurm_protocol_defs.h
@@ -59,6 +59,7 @@
#include "src/common/job_options.h"
#include "src/common/list.h"
#include "src/common/macros.h"
+#include "src/common/slurm_auth.h"
#include "src/common/slurm_cred.h"
#include "src/common/slurm_protocol_common.h"
#include "src/common/slurm_persist_conn.h"
@@ -487,6 +488,19 @@ typedef struct slurm_protocol_config {
typedef struct slurm_msg {
slurm_addr_t address;
void *auth_cred;
+ uid_t auth_uid; /* NEVER PACK. Authenticated uid from auth
+ * credential. Only valid if auth_uid_set is
+ * true. Set to SLURM_AUTH_NOBODY if not set
+ * yet.
+ */
+ bool auth_uid_set; /* NEVER PACK. True when auth_uid has been set.
+ * This is a safety measure against handling
+ * a slurm_msg_t that has been xmalloc()'d but
+ * slurm_msg_t_init() was not called since
+ * auth_uid would be root.
+ */
+ uid_t restrict_uid;
+ bool restrict_uid_set;
uint32_t body_offset; /* DON'T PACK: offset in buffer where body part of
buffer starts. */
Buf buffer; /* DON't PACK! ptr to buffer that msg was unpacked from. */
diff --git a/src/common/slurmdb_defs.c b/src/common/slurmdb_defs.c
index 4acb79d6aa..a56302a23d 100644
--- a/src/common/slurmdb_defs.c
+++ b/src/common/slurmdb_defs.c
@@ -2942,6 +2942,7 @@ extern int slurmdb_send_accounting_update(List update_list, char *cluster,
slurm_set_addr_char(&req.address, port, host);
req.protocol_version = rpc_version;
+ slurm_msg_set_r_uid(&req, SLURM_AUTH_UID_ANY);
req.msg_type = ACCOUNTING_UPDATE_MSG;
if (slurmdbd_conf)
diff --git a/src/common/stepd_api.c b/src/common/stepd_api.c
index 98deb070e0..8f68a09e28 100644
--- a/src/common/stepd_api.c
+++ b/src/common/stepd_api.c
@@ -450,7 +450,7 @@ rwfail:
int
stepd_attach(int fd, uint16_t protocol_version,
slurm_addr_t *ioaddr, slurm_addr_t *respaddr,
- void *job_cred_sig, reattach_tasks_response_msg_t *resp)
+ void *job_cred_sig, uid_t uid, reattach_tasks_response_msg_t *resp)
{
int req = REQUEST_ATTACH;
int rc = SLURM_SUCCESS;
@@ -460,6 +460,7 @@ stepd_attach(int fd, uint16_t protocol_version,
safe_write(fd, ioaddr, sizeof(slurm_addr_t));
safe_write(fd, respaddr, sizeof(slurm_addr_t));
safe_write(fd, job_cred_sig, SLURM_IO_KEY_SIZE);
+ safe_write(fd, &uid, sizeof(uid_t));
safe_write(fd, &protocol_version, sizeof(uint16_t));
} else {
int proto = protocol_version;
diff --git a/src/common/stepd_api.h b/src/common/stepd_api.h
index fce6add26a..0cdde642d0 100644
--- a/src/common/stepd_api.h
+++ b/src/common/stepd_api.h
@@ -177,7 +177,8 @@ int stepd_signal_container(int fd, uint16_t protocol_version, int signal,
*/
int stepd_attach(int fd, uint16_t protocol_version,
slurm_addr_t *ioaddr, slurm_addr_t *respaddr,
- void *job_cred_sig, reattach_tasks_response_msg_t *resp);
+ void *job_cred_sig, uid_t uid,
+ reattach_tasks_response_msg_t *resp);
/*
* Scan for available running slurm step daemons by checking
diff --git a/src/plugins/accounting_storage/common/common_as.c b/src/plugins/accounting_storage/common/common_as.c
index 892ca11a7d..4c00809fa7 100644
--- a/src/plugins/accounting_storage/common/common_as.c
+++ b/src/plugins/accounting_storage/common/common_as.c
@@ -389,6 +389,7 @@ extern int cluster_first_reg(char *host, uint16_t port, uint16_t rpc_version)
out_msg.msg_type = ACCOUNTING_FIRST_REG;
out_msg.flags = SLURM_GLOBAL_AUTH_KEY;
out_msg.data = &update;
+ slurm_msg_set_r_uid(&out_msg, SLURM_AUTH_UID_ANY);
slurm_send_node_msg(fd, &out_msg);
/* We probably need to add matching recv_msg function
* for an arbitray fd or should these be fire
diff --git a/src/plugins/accounting_storage/slurmdbd/slurmdbd_agent.c b/src/plugins/accounting_storage/slurmdbd/slurmdbd_agent.c
index feb29111cd..52ebf46964 100644
--- a/src/plugins/accounting_storage/slurmdbd/slurmdbd_agent.c
+++ b/src/plugins/accounting_storage/slurmdbd/slurmdbd_agent.c
@@ -506,6 +506,7 @@ static void _open_slurmdbd_conn(bool need_db)
slurmdbd_conn->rem_port = slurm_get_accounting_storage_port();
+ slurmdbd_conn->r_uid = SLURM_AUTH_UID_ANY;
if (!slurmdbd_conn->rem_port) {
slurmdbd_conn->rem_port = SLURMDBD_PORT;
slurm_set_accounting_storage_port(
diff --git a/src/plugins/auth/munge/auth_munge.c b/src/plugins/auth/munge/auth_munge.c
index 5b7d96fd3b..d7e14d5c8b 100644
--- a/src/plugins/auth/munge/auth_munge.c
+++ b/src/plugins/auth/munge/auth_munge.c
@@ -84,6 +84,7 @@
const char plugin_name[] = "Munge authentication plugin";
const char plugin_type[] = "auth/munge";
const uint32_t plugin_version = SLURM_VERSION_NUMBER;
+bool hash_enable = true;
static int plugin_errno = SLURM_SUCCESS;
static int bad_cred_test = -1;
@@ -102,12 +103,12 @@ typedef struct _slurm_auth_credential {
int magic; /* magical munge validity magic */
#endif
char *m_str; /* munged string */
- void *buf; /* Application specific data */
bool verified; /* true if this cred has been verified */
- int len; /* amount of App data */
uid_t uid; /* UID. valid only if verified == true */
gid_t gid; /* GID. valid only if verified == true */
int cr_errno;
+ void *data; /* payload data */
+ int dlen; /* payload data length */
} slurm_auth_credential_t;
/*
@@ -154,7 +155,8 @@ int init ( void )
* allocate a credential. Whether the credential is populated with useful
* data at this time is implementation-dependent.
*/
-slurm_auth_credential_t *slurm_auth_create(char *opts)
+slurm_auth_credential_t *slurm_auth_create(char *opts, uid_t r_uid,
+ void *data, int dlen)
{
int rc, retry = RETRY_COUNT, auth_ttl;
slurm_auth_credential_t *cred = NULL;
@@ -191,6 +193,13 @@ slurm_auth_credential_t *slurm_auth_create(char *opts)
}
}
+ rc = munge_ctx_set(ctx, MUNGE_OPT_UID_RESTRICTION, r_uid);
+ if (rc != EMUNGE_SUCCESS) {
+ error("munge_ctx_set failure");
+ munge_ctx_destroy(ctx);
+ return NULL;
+ }
+
auth_ttl = slurm_get_auth_ttl();
if (auth_ttl)
(void) munge_ctx_set(ctx, MUNGE_OPT_TTL, auth_ttl);
@@ -198,8 +207,8 @@ slurm_auth_credential_t *slurm_auth_create(char *opts)
cred = xmalloc(sizeof(*cred));
cred->verified = false;
cred->m_str = NULL;
- cred->buf = NULL;
- cred->len = 0;
+ cred->data = NULL;
+ cred->dlen = 0;
cred->cr_errno = SLURM_SUCCESS;
xassert((cred->magic = MUNGE_MAGIC));
@@ -213,7 +222,7 @@ slurm_auth_credential_t *slurm_auth_create(char *opts)
ohandler = xsignal(SIGALRM, (SigFunc *)SIG_BLOCK);
again:
- err = munge_encode(&cred->m_str, ctx, cred->buf, cred->len);
+ err = munge_encode(&cred->m_str, ctx, data, dlen);
if (err != EMUNGE_SUCCESS) {
if ((err == EMUNGE_SOCKET) && retry--) {
debug("Munge encode failed: %s (retrying ...)",
@@ -258,8 +267,8 @@ slurm_auth_destroy( slurm_auth_credential_t *cred )
*/
if (cred->m_str)
free(cred->m_str);
- if (cred->buf)
- free(cred->buf);
+ if (cred->data)
+ free(cred->data);
xfree(cred);
return SLURM_SUCCESS;
@@ -351,6 +360,34 @@ slurm_auth_get_gid( slurm_auth_credential_t *cred, char *opts )
return cred->gid;
}
+/*
+ * auth_p_verify() must be called first.
+ */
+int auth_p_get_data(slurm_auth_credential_t *cred, char **data, uint32_t *len)
+{
+ if (!cred || !cred->verified) {
+ /*
+ * This xassert will trigger on a development build if
+ * the calling path did not verify the credential first.
+ */
+ xassert(!cred);
+ slurm_seterrno(ESLURM_AUTH_BADARG);
+ return SLURM_ERROR;
+ }
+
+ xassert(cred->magic == MUNGE_MAGIC);
+
+ if (cred->data && cred->dlen) {
+ *data = xmalloc(cred->dlen);
+ memcpy(*data, cred->data, cred->dlen);
+ *len = cred->dlen;
+ } else {
+ *data = NULL;
+ *len = 0;
+ }
+ return SLURM_SUCCESS;
+}
+
/*
* Marshall a credential for transmission over the network, according to
* Slurm's marshalling protocol.
@@ -418,8 +455,6 @@ slurm_auth_unpack( Buf buf )
cred = xmalloc(sizeof(*cred));
cred->verified = false;
cred->m_str = NULL;
- cred->buf = NULL;
- cred->len = 0;
cred->cr_errno = SLURM_SUCCESS;
xassert((cred->magic = MUNGE_MAGIC));
@@ -521,14 +556,9 @@ _decode_cred(slurm_auth_credential_t *c, char *socket)
return SLURM_ERROR;
}
- again:
- c->buf = NULL;
- err = munge_decode(c->m_str, ctx, &c->buf, &c->len, &c->uid, &c->gid);
+again:
+ err = munge_decode(c->m_str, ctx, &c->data, &c->dlen, &c->uid, &c->gid);
if (err != EMUNGE_SUCCESS) {
- if (c->buf) {
- free(c->buf);
- c->buf = NULL;
- }
if ((err == EMUNGE_SOCKET) && retry--) {
debug("Munge decode failed: %s (retrying ...)",
munge_ctx_strerror(ctx));
diff --git a/src/plugins/auth/none/auth_none.c b/src/plugins/auth/none/auth_none.c
index 9fbc1dbb0d..843469935b 100644
--- a/src/plugins/auth/none/auth_none.c
+++ b/src/plugins/auth/none/auth_none.c
@@ -74,6 +74,7 @@
const char plugin_name[] = "Null authentication plugin";
const char plugin_type[] = "auth/none";
const uint32_t plugin_version = SLURM_VERSION_NUMBER;
+bool hash_enable = false;
/*
* An opaque type representing authentication credentials. This type can be
@@ -148,7 +149,8 @@ extern int fini ( void )
* Allocate and initializes a credential. This function should return
* NULL if it cannot allocate a credential.
*/
-slurm_auth_credential_t *slurm_auth_create(char *auth_info)
+slurm_auth_credential_t *slurm_auth_create(char *auth_info, uid_t r_uid,
+ void *data, int dlen)
{
slurm_auth_credential_t *cred;
cred = xmalloc(sizeof(slurm_auth_credential_t));
@@ -214,6 +216,19 @@ slurm_auth_get_gid( slurm_auth_credential_t *cred, char *auth_info )
}
}
+int auth_p_get_data(slurm_auth_credential_t *cred, char **data, uint32_t *len)
+{
+ if (!cred) {
+ slurm_seterrno(ESLURM_AUTH_BADARG);
+ return SLURM_ERROR;
+ }
+
+ *data = NULL;
+ *len = 0;
+
+ return SLURM_SUCCESS;
+}
+
/*
* Marshall a credential for transmission over the network, according to
* Slurm's marshalling protocol.
diff --git a/src/plugins/mpi/pmi2/setup.c b/src/plugins/mpi/pmi2/setup.c
index 30b1d26270..b5f7d7521e 100644
--- a/src/plugins/mpi/pmi2/setup.c
+++ b/src/plugins/mpi/pmi2/setup.c
@@ -106,6 +106,8 @@ _setup_stepd_job_info(const stepd_step_rec_t *job, char ***env)
memset(&job_info, 0, sizeof(job_info));
+ job_info.uid = job->uid;
+
if (job->pack_jobid && (job->pack_jobid != NO_VAL)) {
job_info.jobid = job->pack_jobid;
job_info.stepid = job->stepid;
diff --git a/src/plugins/mpi/pmi2/setup.h b/src/plugins/mpi/pmi2/setup.h
index 6f25d372e7..e329b5d215 100644
--- a/src/plugins/mpi/pmi2/setup.h
+++ b/src/plugins/mpi/pmi2/setup.h
@@ -59,6 +59,7 @@
typedef struct pmi2_job_info {
uint32_t jobid; /* Current Slurm job id */
uint32_t stepid; /* Current step id (or NO_VAL) */
+ uid_t uid; /* user id for job */
uint32_t nnodes; /* number of nodes in current job step */
uint32_t nodeid; /* relative position of this node in job */
uint32_t ntasks; /* total number of tasks in current job */
diff --git a/src/plugins/mpi/pmi2/spawn.c b/src/plugins/mpi/pmi2/spawn.c
index 3b2b85bb16..b3fc6294b1 100644
--- a/src/plugins/mpi/pmi2/spawn.c
+++ b/src/plugins/mpi/pmi2/spawn.c
@@ -151,7 +151,8 @@ spawn_req_pack(spawn_req_t *req, Buf buf)
void *auth_cred;
char *auth_info = slurm_get_auth_info();
- auth_cred = g_slurm_auth_create(auth_info);
+ auth_cred = g_slurm_auth_create(auth_info,
+ job_info.uid, NULL, 0);
xfree(auth_info);
if (auth_cred == NULL) {
error("authentication: %s",
diff --git a/src/plugins/mpi/pmix/pmixp_dconn.c b/src/plugins/mpi/pmix/pmixp_dconn.c
index e8524a8ce3..594230bfc1 100644
--- a/src/plugins/mpi/pmix/pmixp_dconn.c
+++ b/src/plugins/mpi/pmix/pmixp_dconn.c
@@ -79,6 +79,7 @@ int pmixp_dconn_init(int node_cnt, pmixp_p2p_data_t direct_hdr)
_pmixp_dconn_conns[i].nodeid = i;
_pmixp_dconn_conns[i].state = PMIXP_DIRECT_INIT;
_pmixp_dconn_conns[i].priv = _pmixp_dconn_h.init(i, direct_hdr);
+ _pmixp_dconn_conns[i].uid = slurm_conf.slurmd_user_id;
}
return SLURM_SUCCESS;
}
diff --git a/src/plugins/mpi/pmix/pmixp_dconn.h b/src/plugins/mpi/pmix/pmixp_dconn.h
index 77f302f103..7e34fbdad2 100644
--- a/src/plugins/mpi/pmix/pmixp_dconn.h
+++ b/src/plugins/mpi/pmix/pmixp_dconn.h
@@ -82,6 +82,9 @@ typedef struct {
/* remote node info */
int nodeid;
void *priv;
+
+ /* authenticated uid on remote */
+ uid_t uid;
} pmixp_dconn_t;
typedef void *(*pmixp_dconn_p2p_init_t)(int nodeid,
diff --git a/src/plugins/mpi/pmix/pmixp_server.c b/src/plugins/mpi/pmix/pmixp_server.c
index 0bc10376c3..944ab865ae 100644
--- a/src/plugins/mpi/pmix/pmixp_server.c
+++ b/src/plugins/mpi/pmix/pmixp_server.c
@@ -494,13 +494,14 @@ void pmixp_server_cleanup(void)
* --------------------- Authentication functionality -------------------
*/
-static int _auth_cred_create(Buf buf)
+static int _auth_cred_create(Buf buf, uid_t uid)
{
void *auth_cred = NULL;
char *auth_info = slurm_get_auth_info();
int rc = SLURM_SUCCESS;
- auth_cred = g_slurm_auth_create(auth_info);
+ auth_cred = g_slurm_auth_create(auth_info,
+ uid, NULL, 0);
xfree(auth_info);
if (!auth_cred) {
rc = g_slurm_auth_errno(NULL);
@@ -519,7 +520,7 @@ static int _auth_cred_create(Buf buf)
return rc;
}
-static int _auth_cred_verify(Buf buf)
+static int _auth_cred_verify(Buf buf, uid_t *uid)
{
void *auth_cred = NULL;
char *auth_info = NULL;
@@ -536,9 +537,19 @@ static int _auth_cred_verify(Buf buf)
rc = g_slurm_auth_verify(auth_cred, auth_info);
xfree(auth_info);
- if (rc)
+ if (rc) {
PMIXP_ERROR("Verifying authentication credential: %s",
g_slurm_auth_errstr(g_slurm_auth_errno(auth_cred)));
+ } else {
+ uid_t auth_uid;
+ auth_uid = g_slurm_auth_get_uid(auth_cred);
+ if ((auth_uid != slurm_conf.slurmd_user_id) &&
+ (auth_uid != _pmixp_job_info.uid)) {
+ PMIXP_ERROR("Credential from uid %u", auth_uid);
+ rc = SLURM_ERROR;
+ }
+ *uid = auth_uid;
+ }
g_slurm_auth_destroy(auth_cred);
return rc;
}
@@ -705,7 +716,7 @@ static int _process_extended_hdr(pmixp_base_hdr_t *hdr, Buf buf)
pmixp_base_hdr_t bhdr;
init_msg = xmalloc(sizeof(*init_msg));
- rc = _auth_cred_create(buf_init);
+ rc = _auth_cred_create(buf_init, dconn->uid);
if (rc) {
FREE_NULL_BUFFER(init_msg->buf_ptr);
xfree(init_msg);
@@ -1159,6 +1170,7 @@ _direct_conn_establish(pmixp_conn_t *conn, void *_hdr, void *msg)
Buf buf_msg;
int rc;
char *nodename = NULL;
+ uid_t uid = SLURM_AUTH_NOBODY;
if (!hdr->ext_flag) {
nodename = pmixp_info_job_host(hdr->nodeid);
@@ -1182,7 +1194,7 @@ _direct_conn_establish(pmixp_conn_t *conn, void *_hdr, void *msg)
return;
}
/* Unpack and verify the auth credential */
- rc = _auth_cred_verify(buf_msg);
+ rc = _auth_cred_verify(buf_msg, &uid);
FREE_NULL_BUFFER(buf_msg);
if (rc) {
close(fd);
@@ -1206,6 +1218,9 @@ _direct_conn_establish(pmixp_conn_t *conn, void *_hdr, void *msg)
xfree(nodename);
return;
}
+
+ dconn->uid = uid;
+
new_conn = pmixp_conn_new_persist(PMIXP_PROTO_DIRECT,
pmixp_dconn_engine(dconn),
_direct_new_msg_conn,
diff --git a/src/plugins/mpi/pmix/pmixp_utils.c b/src/plugins/mpi/pmix/pmixp_utils.c
index d1f8929fe2..bb89fc1133 100644
--- a/src/plugins/mpi/pmix/pmixp_utils.c
+++ b/src/plugins/mpi/pmix/pmixp_utils.c
@@ -404,6 +404,7 @@ static int _pmix_p2p_send_core(const char *nodename, const char *address,
msg.forward.timeout = timeout;
msg.forward.cnt = 0;
msg.forward.nodelist = NULL;
+ slurm_msg_set_r_uid(&msg, slurm_conf.slurmd_user_id);
ret_list = slurm_send_addr_recv_msgs(&msg, (char*)nodename, timeout);
if (!ret_list) {
/* This should never happen (when this was
diff --git a/src/sattach/sattach.c b/src/sattach/sattach.c
index 66fd8ee876..5d12ba1225 100644
--- a/src/sattach/sattach.c
+++ b/src/sattach/sattach.c
@@ -405,6 +405,7 @@ static int _attach_to_tasks(uint32_t jobid,
reattach_msg.io_port = io_ports;
reattach_msg.cred = fake_cred;
+ slurm_msg_set_r_uid(&msg, SLURM_AUTH_UID_ANY);
msg.msg_type = REQUEST_REATTACH_TASKS;
msg.data = &reattach_msg;
msg.protocol_version = layout->start_protocol_ver;
diff --git a/src/slurmctld/agent.c b/src/slurmctld/agent.c
index a8d7cc0d38..a67e90ef3b 100644
--- a/src/slurmctld/agent.c
+++ b/src/slurmctld/agent.c
@@ -144,6 +144,7 @@ typedef struct agent_info {
uint16_t retry; /* if set, keep trying */
thd_t *thread_struct; /* thread structures */
bool get_reply; /* flag if reply expected */
+ uid_t r_uid; /* receiver UID */
slurm_msg_type_t msg_type; /* RPC to be issued */
void **msg_args_pptr; /* RPC data to be used */
uint16_t protocol_version; /* if set, use this version */
@@ -157,6 +158,7 @@ typedef struct task_info {
uint32_t *threads_active_ptr; /* currently active thread ptr */
thd_t *thread_struct_ptr; /* thread structures ptr */
bool get_reply; /* flag if reply expected */
+ uid_t r_uid; /* receiver UID */
slurm_msg_type_t msg_type; /* RPC to be issued */
void *msg_args_ptr; /* ptr to RPC data to be used */
uint16_t protocol_version; /* if set, use this version */
@@ -422,6 +424,11 @@ static int _valid_agent_arg(agent_arg_t *agent_arg_ptr)
__func__, agent_arg_ptr->node_count, hostlist_cnt);
return SLURM_FAILURE; /* no messages to be sent */
}
+ if (!agent_arg_ptr->r_uid_set) {
+ error("%s: r_uid not set for message:%u ",
+ __func__, agent_arg_ptr->msg_type);
+ return SLURM_ERROR;
+ }
return SLURM_SUCCESS;
}
@@ -444,6 +451,7 @@ static agent_info_t *_make_agent_info(agent_arg_t *agent_arg_ptr)
thread_ptr = xmalloc(agent_info_ptr->thread_count * sizeof(thd_t));
memset(thread_ptr, 0, (agent_info_ptr->thread_count * sizeof(thd_t)));
agent_info_ptr->thread_struct = thread_ptr;
+ agent_info_ptr->r_uid = agent_arg_ptr->r_uid;
agent_info_ptr->msg_type = agent_arg_ptr->msg_type;
agent_info_ptr->msg_args_pptr = &agent_arg_ptr->msg_args;
agent_info_ptr->protocol_version = agent_arg_ptr->protocol_version;
@@ -527,6 +535,7 @@ static task_info_t *_make_task_data(agent_info_t *agent_info_ptr, int inx)
task_info_ptr->threads_active_ptr= &agent_info_ptr->threads_active;
task_info_ptr->thread_struct_ptr = &agent_info_ptr->thread_struct[inx];
task_info_ptr->get_reply = agent_info_ptr->get_reply;
+ task_info_ptr->r_uid = agent_info_ptr->r_uid;
task_info_ptr->msg_type = agent_info_ptr->msg_type;
task_info_ptr->msg_args_ptr = *agent_info_ptr->msg_args_pptr;
task_info_ptr->protocol_version = agent_info_ptr->protocol_version;
@@ -924,6 +933,7 @@ static void *_thread_per_group_rpc(void *args)
msg.msg_type = msg_type;
msg.data = task_ptr->msg_args_ptr;
+ slurm_msg_set_r_uid(&msg, task_ptr->r_uid);
#if 0
info("%s: sending %s to %s", __func__, rpc_num2string(msg_type),
thread_ptr->nodelist);
@@ -1292,6 +1302,8 @@ static void _queue_agent_retry(agent_info_t * agent_info_ptr, int count)
agent_arg_ptr->msg_args = *(agent_info_ptr->msg_args_pptr);
*(agent_info_ptr->msg_args_pptr) = NULL;
+ set_agent_arg_r_uid(agent_arg_ptr, agent_info_ptr->r_uid);
+
j = 0;
for (i = 0; i < agent_info_ptr->thread_count; i++) {
if (!thread_ptr[i].ret_list) {
@@ -2190,3 +2202,10 @@ extern int retry_list_size(void)
return 0;
return list_count(retry_list);
}
+
+/* Set r_uid of agent_arg */
+extern void set_agent_arg_r_uid(agent_arg_t *agent_arg_ptr, uid_t r_uid)
+{
+ agent_arg_ptr->r_uid = r_uid;
+ agent_arg_ptr->r_uid_set = true;
+}
diff --git a/src/slurmctld/agent.h b/src/slurmctld/agent.h
index afaa82fd1c..ce7809a056 100644
--- a/src/slurmctld/agent.h
+++ b/src/slurmctld/agent.h
@@ -54,6 +54,8 @@ typedef struct agent_arg {
uint32_t node_count; /* number of nodes to communicate
* with */
uint16_t retry; /* if set, keep trying */
+ uid_t r_uid; /* receiver UID */
+ bool r_uid_set; /* True if receiver UID set*/
slurm_addr_t *addr; /* if set will send to this
addr not hostlist */
hostlist_t hostlist; /* hostlist containing the
@@ -111,4 +113,7 @@ extern void mail_job_info (struct job_record *job_ptr, uint16_t mail_type);
/* Return length of agent's retry_list */
extern int retry_list_size(void);
+/* Set r_uid of agent_arg */
+extern void set_agent_arg_r_uid(agent_arg_t *agent_arg_ptr, uid_t r_uid);
+
#endif /* !_AGENT_H */
diff --git a/src/slurmctld/backup.c b/src/slurmctld/backup.c
index c8fe17aeee..e9ba2849e9 100644
--- a/src/slurmctld/backup.c
+++ b/src/slurmctld/backup.c
@@ -343,7 +343,7 @@ static void *_background_rpc_mgr(void *no_data)
int newsockfd, sockfd;
slurm_addr_t cli_addr;
slurm_msg_t msg;
- int error_code;
+ int error_code = SLURM_ERROR;
/* Read configuration only */
slurmctld_lock_t config_read_lock = {
@@ -393,8 +393,9 @@ static void *_background_rpc_mgr(void *no_data)
slurm_msg_t_init(&msg);
if (slurm_receive_msg(newsockfd, &msg, 0) != 0)
error("slurm_receive_msg: %m");
+ else
+ error_code = _background_process_msg(&msg);
- error_code = _background_process_msg(&msg);
if ((error_code == SLURM_SUCCESS) &&
(msg.msg_type == REQUEST_SHUTDOWN_IMMEDIATE) &&
(slurmctld_config.shutdown_time == 0))
@@ -440,6 +441,10 @@ static int _background_process_msg(slurm_msg_t *msg)
int error_code = SLURM_SUCCESS;
bool send_rc = true;
+ if (!msg->auth_uid_set)
+ fatal("%s: received message without previously validated auth",
+ __func__);
+
if (msg->msg_type != REQUEST_PING) {
bool super_user = false;
char *auth_info = slurm_get_auth_info();
@@ -493,6 +498,7 @@ static void *_ping_ctld_thread(void *arg)
slurm_msg_t_init(&req);
slurm_set_addr(&req.address, ping->slurmctld_port, ping->control_addr);
req.msg_type = REQUEST_CONTROL_STATUS;
+ slurm_msg_set_r_uid(&req, SLURM_AUTH_UID_ANY);
if (slurm_send_recv_node_msg(&req, &resp, 0) == SLURM_SUCCESS) {
switch (resp.msg_type) {
case RESPONSE_CONTROL_STATUS:
@@ -627,6 +633,7 @@ static void *_shutdown_controller(void *arg)
xfree(arg);
slurm_msg_t_init(&req);
+ slurm_msg_set_r_uid(&req, slurm_conf.slurm_user_id);
slurm_set_addr(&req.address, slurmctld_conf.slurmctld_port,
slurmctld_conf.control_addr[shutdown_inx]);
req.msg_type = REQUEST_CONTROL;
diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c
index 5b6b4d0e7e..20a7585f34 100644
--- a/src/slurmctld/controller.c
+++ b/src/slurmctld/controller.c
@@ -1868,6 +1868,7 @@ static void _queue_reboot_msg(void)
reboot_agent_args->hostlist);
debug("Queuing reboot request for nodes %s", host_str);
xfree(host_str);
+ set_agent_arg_r_uid(reboot_agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(reboot_agent_args);
last_node_update = now;
schedule_node_save();
@@ -2708,6 +2709,7 @@ static void *_shutdown_bu_thread(void *arg)
xfree(arg);
slurm_msg_t_init(&req);
+ slurm_msg_set_r_uid(&req, slurm_conf.slurm_user_id);
slurm_set_addr(&req.address, slurmctld_conf.slurmctld_port,
slurmctld_conf.control_addr[bu_inx]);
req.msg_type = REQUEST_CONTROL;
diff --git a/src/slurmctld/fed_mgr.c b/src/slurmctld/fed_mgr.c
index 2ea87e5f13..28bda763f5 100644
--- a/src/slurmctld/fed_mgr.c
+++ b/src/slurmctld/fed_mgr.c
@@ -352,6 +352,8 @@ static int _open_controller_conn(slurmdb_cluster_rec_t *cluster, bool locked)
persist_conn->rem_port = cluster->control_port;
}
+ persist_conn->r_uid = SLURM_AUTH_UID_ANY;
+
rc = slurm_persist_conn_open(persist_conn);
if (rc != SLURM_SUCCESS) {
if (_comm_fail_log(cluster)) {
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index 7e7a2fb085..8f378f72f9 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -5863,6 +5863,7 @@ _signal_batch_job(struct job_record *job_ptr, uint16_t signal, uint16_t flags)
signal_tasks_msg->signal = signal;
agent_args->msg_args = signal_tasks_msg;
+ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_args);
return;
}
@@ -13921,8 +13922,7 @@ extern int update_job_str(slurm_msg_t *msg, uid_t uid)
reply:
if ((rc != ESLURM_JOB_SETTING_DB_INX) && (msg->conn_fd >= 0)) {
- slurm_msg_t_init(&resp_msg);
- resp_msg.protocol_version = msg->protocol_version;
+ response_init(&resp_msg, msg);
if (resp_array) {
resp_array_msg = _resp_array_xlate(resp_array, job_id);
resp_msg.msg_type = RESPONSE_JOB_ARRAY_ERRORS;
@@ -13932,7 +13932,6 @@ reply:
rc_msg.return_code = rc;
resp_msg.data = &rc_msg;
}
- resp_msg.conn = msg->conn;
slurm_send_node_msg(msg->conn_fd, &resp_msg);
if (resp_array_msg) {
@@ -14024,6 +14023,7 @@ static void _send_job_kill(struct job_record *job_ptr)
}
agent_args->msg_args = kill_job;
+ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_args);
return;
}
@@ -14424,6 +14424,7 @@ abort_job_on_node(uint32_t job_id, struct job_record *job_ptr, char *node_name)
agent_info->msg_type = REQUEST_ABORT_JOB;
agent_info->msg_args = kill_req;
+ set_agent_arg_r_uid(agent_info, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_info);
}
@@ -14471,6 +14472,7 @@ extern void kill_job_on_node(struct job_record *job_ptr,
agent_info->msg_type = REQUEST_TERMINATE_JOB;
agent_info->msg_args = kill_req;
+ set_agent_arg_r_uid(agent_info, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_info);
}
@@ -15452,6 +15454,7 @@ static void _signal_job(struct job_record *job_ptr, int signal, uint16_t flags)
}
agent_args->msg_args = signal_job_msg;
+ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_args);
return;
}
@@ -15532,6 +15535,7 @@ static void _suspend_job(struct job_record *job_ptr, uint16_t op,
}
agent_args->msg_args = sus_ptr;
+ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_args);
return;
}
@@ -15877,6 +15881,7 @@ extern int job_suspend(suspend_msg_t *sus_ptr, uid_t uid,
memset(&rc_msg, 0, sizeof(rc_msg));
rc_msg.return_code = rc;
resp_msg.data = &rc_msg;
+ slurm_msg_set_r_uid(&resp_msg, uid);
slurm_send_node_msg(conn_fd, &resp_msg);
}
return rc;
@@ -16027,6 +16032,7 @@ extern int job_suspend2(suspend_msg_t *sus_ptr, uid_t uid,
rc_msg.return_code = rc;
resp_msg.data = &rc_msg;
}
+ slurm_msg_set_r_uid(&resp_msg, uid);
slurm_send_node_msg(conn_fd, &resp_msg);
if (resp_array_msg) {
@@ -16759,6 +16765,7 @@ reply: FREE_NULL_LIST(top_job_list);
memset(&rc_msg, 0, sizeof(rc_msg));
rc_msg.return_code = rc;
resp_msg.data = &rc_msg;
+ slurm_msg_set_r_uid(&resp_msg, uid);
slurm_send_node_msg(conn_fd, &resp_msg);
}
diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c
index d2e0f9376f..987bda8507 100644
--- a/src/slurmctld/job_scheduler.c
+++ b/src/slurmctld/job_scheduler.c
@@ -2746,6 +2746,7 @@ extern void launch_job(struct job_record *job_ptr)
agent_arg_ptr->hostlist = hostlist_create(launch_job_ptr->batch_host);
agent_arg_ptr->msg_type = REQUEST_BATCH_JOB_LAUNCH;
agent_arg_ptr->msg_args = (void *) launch_msg_ptr;
+ set_agent_arg_r_uid(agent_arg_ptr, SLURM_AUTH_UID_ANY);
/* Launch the RPC via agent */
agent_queue_request(agent_arg_ptr);
@@ -4358,6 +4359,7 @@ extern int reboot_job_nodes(struct job_record *job_ptr)
rc = SLURM_ERROR;
}
xfree(nodes);
+ set_agent_arg_r_uid(reboot_agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(reboot_agent_args);
}
@@ -4389,6 +4391,7 @@ extern int reboot_job_nodes(struct job_record *job_ptr)
rc = SLURM_ERROR;
}
xfree(nodes);
+ set_agent_arg_r_uid(reboot_agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(reboot_agent_args);
}
diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c
index 1a4188c365..fe0c596691 100644
--- a/src/slurmctld/node_mgr.c
+++ b/src/slurmctld/node_mgr.c
@@ -3688,11 +3688,11 @@ void msg_to_slurmd (slurm_msg_type_t msg_type)
xfree (kill_agent_args);
} else {
debug ("Spawning agent msg_type=%d", msg_type);
+ set_agent_arg_r_uid(kill_agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(kill_agent_args);
}
}
-
/* make_node_alloc - flag specified node as allocated to a job
* IN node_ptr - pointer to node being allocated
* IN job_ptr - pointer to job that is starting
diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c
index 8ffcb242ec..1e14fb5cc0 100644
--- a/src/slurmctld/node_scheduler.c
+++ b/src/slurmctld/node_scheduler.c
@@ -657,6 +657,7 @@ extern void deallocate_nodes(struct job_record *job_ptr, bool timeout,
}
agent_args->msg_args = kill_job;
+ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_args);
return;
}
@@ -3265,6 +3266,7 @@ extern void launch_prolog(struct job_record *job_ptr)
select_g_step_start(build_extern_step(job_ptr));
/* Launch the RPC via agent */
+ set_agent_arg_r_uid(agent_arg_ptr, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_arg_ptr);
}
@@ -4687,6 +4689,7 @@ extern void re_kill_job(struct job_record *job_ptr)
last_job_id = job_ptr->job_id;
hostlist_destroy(kill_hostlist);
agent_args->msg_args = kill_job;
+ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_args);
return;
}
diff --git a/src/slurmctld/ping_nodes.c b/src/slurmctld/ping_nodes.c
index 2a32fe7c19..059dc404df 100644
--- a/src/slurmctld/ping_nodes.c
+++ b/src/slurmctld/ping_nodes.c
@@ -350,6 +350,7 @@ void ping_nodes (void)
debug("Spawning ping agent for %s", host_str);
xfree(host_str);
ping_begin();
+ set_agent_arg_r_uid(ping_agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(ping_agent_args);
}
@@ -364,6 +365,7 @@ void ping_nodes (void)
host_str, reg_agent_args->node_count);
xfree(host_str);
ping_begin();
+ set_agent_arg_r_uid(reg_agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(reg_agent_args);
}
@@ -524,6 +526,7 @@ extern void run_health_check(void)
debug("Spawning health check agent for %s", host_str);
xfree(host_str);
ping_begin();
+ set_agent_arg_r_uid(check_agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(check_agent_args);
}
}
@@ -583,6 +586,7 @@ extern void update_nodes_acct_gather_data(void)
info("Updating acct_gather data for %s", host_str);
xfree(host_str);
ping_begin();
+ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_args);
}
}
diff --git a/src/slurmctld/srun_comm.c b/src/slurmctld/srun_comm.c
index 9a3cd76559..16ec332a40 100644
--- a/src/slurmctld/srun_comm.c
+++ b/src/slurmctld/srun_comm.c
@@ -57,7 +57,7 @@
*/
static void _srun_agent_launch(slurm_addr_t *addr, char *host,
slurm_msg_type_t type, void *msg_args,
- uint16_t protocol_version)
+ uid_t r_uid, uint16_t protocol_version)
{
agent_arg_t *agent_args = xmalloc(sizeof(agent_arg_t));
@@ -67,6 +67,7 @@ static void _srun_agent_launch(slurm_addr_t *addr, char *host,
agent_args->hostlist = hostlist_create(host);
agent_args->msg_type = type;
agent_args->msg_args = msg_args;
+ set_agent_arg_r_uid(agent_args, r_uid);
agent_args->protocol_version = protocol_version;
agent_queue_request(agent_args);
@@ -145,6 +146,7 @@ extern void srun_allocate(struct job_record *job_ptr)
msg_arg = build_alloc_msg(job_ptr, SLURM_SUCCESS, NULL);
_srun_agent_launch(addr, job_ptr->alloc_node,
RESPONSE_RESOURCE_ALLOCATION, msg_arg,
+ job_ptr->user_id,
job_ptr->start_protocol_ver);
} else if (_pending_pack_jobs(job_ptr)) {
return;
@@ -168,6 +170,7 @@ extern void srun_allocate(struct job_record *job_ptr)
list_iterator_destroy(iter);
_srun_agent_launch(addr, job_ptr->alloc_node,
RESPONSE_JOB_PACK_ALLOCATION, job_resp_list,
+ job_ptr->user_id,
job_ptr->start_protocol_ver);
} else {
error("%s: Can not find pack job leader %pJ",
@@ -193,7 +196,7 @@ extern void srun_allocate_abort(struct job_record *job_ptr)
msg_arg->step_id = NO_VAL;
_srun_agent_launch(addr, job_ptr->alloc_node,
SRUN_JOB_COMPLETE,
- msg_arg,
+ msg_arg, job_ptr->user_id,
job_ptr->start_protocol_ver);
}
}
@@ -246,7 +249,8 @@ extern void srun_node_fail(struct job_record *job_ptr, char *node_name)
msg_arg->step_id = step_ptr->step_id;
msg_arg->nodelist = xstrdup(node_name);
_srun_agent_launch(addr, step_ptr->host, SRUN_NODE_FAIL,
- msg_arg, step_ptr->start_protocol_ver);
+ msg_arg, job_ptr->user_id,
+ step_ptr->start_protocol_ver);
}
list_iterator_destroy(step_iterator);
@@ -258,7 +262,8 @@ extern void srun_node_fail(struct job_record *job_ptr, char *node_name)
msg_arg->step_id = NO_VAL;
msg_arg->nodelist = xstrdup(node_name);
_srun_agent_launch(addr, job_ptr->alloc_node, SRUN_NODE_FAIL,
- msg_arg, job_ptr->start_protocol_ver);
+ msg_arg, job_ptr->user_id,
+ job_ptr->start_protocol_ver);
}
}
@@ -292,7 +297,7 @@ extern void srun_ping (void)
msg_arg->job_id = job_ptr->job_id;
msg_arg->step_id = NO_VAL;
_srun_agent_launch(addr, job_ptr->alloc_node,
- SRUN_PING, msg_arg,
+ SRUN_PING, msg_arg, job_ptr->user_id,
job_ptr->start_protocol_ver);
}
}
@@ -323,6 +328,7 @@ extern void srun_step_timeout(struct step_record *step_ptr, time_t timeout_val)
msg_arg->step_id = step_ptr->step_id;
msg_arg->timeout = timeout_val;
_srun_agent_launch(addr, step_ptr->host, SRUN_TIMEOUT, msg_arg,
+ step_ptr->job_ptr->user_id,
step_ptr->start_protocol_ver);
}
@@ -349,7 +355,8 @@ extern void srun_timeout (struct job_record *job_ptr)
msg_arg->step_id = NO_VAL;
msg_arg->timeout = job_ptr->end_time;
_srun_agent_launch(addr, job_ptr->alloc_node, SRUN_TIMEOUT,
- msg_arg, job_ptr->start_protocol_ver);
+ msg_arg, job_ptr->user_id,
+ job_ptr->start_protocol_ver);
}
@@ -379,7 +386,8 @@ extern int srun_user_message(struct job_record *job_ptr, char *msg)
msg_arg->job_id = job_ptr->job_id;
msg_arg->msg = xstrdup(msg);
_srun_agent_launch(addr, job_ptr->resp_host, SRUN_USER_MSG,
- msg_arg, job_ptr->start_protocol_ver);
+ msg_arg, job_ptr->user_id,
+ job_ptr->start_protocol_ver);
return SLURM_SUCCESS;
} else if (job_ptr->batch_flag && IS_JOB_RUNNING(job_ptr)) {
#ifndef HAVE_FRONT_END
@@ -418,6 +426,7 @@ extern int srun_user_message(struct job_record *job_ptr, char *msg)
agent_arg_ptr->msg_type = REQUEST_JOB_NOTIFY;
agent_arg_ptr->msg_args = (void *) notify_msg_ptr;
/* Launch the RPC via agent */
+ set_agent_arg_r_uid(agent_arg_ptr, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_arg_ptr);
return SLURM_SUCCESS;
}
@@ -445,6 +454,7 @@ extern void srun_job_complete (struct job_record *job_ptr)
msg_arg->step_id = NO_VAL;
_srun_agent_launch(addr, job_ptr->alloc_node,
SRUN_JOB_COMPLETE, msg_arg,
+ job_ptr->user_id,
job_ptr->start_protocol_ver);
}
@@ -479,6 +489,7 @@ extern bool srun_job_suspend (struct job_record *job_ptr, uint16_t op)
msg_arg->op = op;
_srun_agent_launch(addr, job_ptr->alloc_node,
SRUN_REQUEST_SUSPEND, msg_arg,
+ job_ptr->user_id,
job_ptr->start_protocol_ver);
msg_sent = true;
}
@@ -502,7 +513,8 @@ extern void srun_step_complete (struct step_record *step_ptr)
msg_arg->job_id = step_ptr->job_ptr->job_id;
msg_arg->step_id = step_ptr->step_id;
_srun_agent_launch(addr, step_ptr->host, SRUN_JOB_COMPLETE,
- msg_arg, step_ptr->start_protocol_ver);
+ msg_arg, step_ptr->job_ptr->user_id,
+ step_ptr->start_protocol_ver);
}
}
@@ -527,7 +539,8 @@ extern void srun_step_missing (struct step_record *step_ptr,
msg_arg->step_id = step_ptr->step_id;
msg_arg->nodelist = xstrdup(node_list);
_srun_agent_launch(addr, step_ptr->host, SRUN_STEP_MISSING,
- msg_arg, step_ptr->start_protocol_ver);
+ msg_arg, step_ptr->job_ptr->user_id,
+ step_ptr->start_protocol_ver);
}
}
@@ -551,7 +564,8 @@ extern void srun_step_signal (struct step_record *step_ptr, uint16_t signal)
msg_arg->job_step_id = step_ptr->step_id;
msg_arg->signal = signal;
_srun_agent_launch(addr, step_ptr->host, SRUN_STEP_SIGNAL,
- msg_arg, step_ptr->start_protocol_ver);
+ msg_arg, step_ptr->job_ptr->user_id,
+ step_ptr->start_protocol_ver);
}
}
@@ -582,7 +596,8 @@ extern void srun_exec(struct step_record *step_ptr, char **argv)
for (i=0; i<cnt ; i++)
msg_arg->argv[i] = xstrdup(argv[i]);
_srun_agent_launch(addr, step_ptr->host, SRUN_EXEC,
- msg_arg, step_ptr->start_protocol_ver);
+ msg_arg, step_ptr->job_ptr->user_id,
+ step_ptr->start_protocol_ver);
} else {
error("srun_exec %pS lacks communication channel",
step_ptr);
diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c
index cd4be83091..5ba6386daa 100644
--- a/src/slurmctld/step_mgr.c
+++ b/src/slurmctld/step_mgr.c
@@ -724,6 +724,7 @@ void signal_step_tasks(struct step_record *step_ptr, uint16_t signal,
}
agent_args->msg_args = signal_tasks_msg;
+ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_args);
return;
}
@@ -769,6 +770,7 @@ void signal_step_tasks_on_node(char* node_name, struct step_record *step_ptr,
signal_tasks_msg->job_step_id = step_ptr->step_id;
signal_tasks_msg->signal = signal;
agent_args->msg_args = signal_tasks_msg;
+ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_args);
return;
}
@@ -4504,6 +4506,7 @@ static void _signal_step_timelimit(struct job_record *job_ptr,
}
agent_args->msg_args = kill_step;
+ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_args);
return;
}
diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c
index bbb41957b3..e5f3d04c5a 100644
--- a/src/slurmd/slurmd/req.c
+++ b/src/slurmd/slurmd/req.c
@@ -474,7 +474,7 @@ rwfail:
static int
_send_slurmstepd_init(int fd, int type, void *req,
- slurm_addr_t *cli, slurm_addr_t *self,
+ slurm_addr_t *cli, uid_t cli_uid, slurm_addr_t *self,
hostset_t step_hset, uint16_t protocol_version)
{
int len = 0;
@@ -623,6 +623,7 @@ _send_slurmstepd_init(int fd, int type, void *req,
safe_write(fd, get_buf_data(buffer), len);
free_buf(buffer);
buffer = NULL;
+ safe_write(fd, &cli_uid, sizeof(uid_t));
/* send self address over to slurmstepd */
if (self) {
@@ -696,7 +697,7 @@ rwfail:
*/
static int
_forkexec_slurmstepd(uint16_t type, void *req,
- slurm_addr_t *cli, slurm_addr_t *self,
+ slurm_addr_t *cli, uid_t cli_uid, slurm_addr_t *self,
const hostset_t step_hset, uint16_t protocol_version)
{
pid_t pid;
@@ -738,7 +739,7 @@ _forkexec_slurmstepd(uint16_t type, void *req,
error("Unable to close write to_slurmd in parent: %m");
if ((rc = _send_slurmstepd_init(to_stepd[1], type,
- req, cli, self,
+ req, cli, cli_uid, self,
step_hset,
protocol_version)) != 0) {
error("Unable to init slurmstepd");
@@ -1583,8 +1584,9 @@ _rpc_launch_tasks(slurm_msg_t *msg)
}
debug3("_rpc_launch_tasks: call to _forkexec_slurmstepd");
- errnum = _forkexec_slurmstepd(LAUNCH_TASKS, (void *)req, cli, &self,
- step_hset, msg->protocol_version);
+ errnum = _forkexec_slurmstepd(LAUNCH_TASKS, (void *)req, cli,
+ msg->auth_uid, &self, step_hset,
+ msg->protocol_version);
debug3("_rpc_launch_tasks: return from _forkexec_slurmstepd");
_launch_complete_add(req->job_id);
@@ -2231,7 +2233,7 @@ static int _spawn_prolog_stepd(slurm_msg_t *msg)
debug3("%s: call to _forkexec_slurmstepd", __func__);
rc = _forkexec_slurmstepd(LAUNCH_TASKS, (void *)launch_req,
- cli, &self, step_hset,
+ cli, msg->auth_uid, &self, step_hset,
msg->protocol_version);
debug3("%s: return from _forkexec_slurmstepd %d",
__func__, rc);
@@ -2541,8 +2543,9 @@ _rpc_batch_job(slurm_msg_t *msg, bool new_msg)
info("Launching batch job %u for UID %u", req->job_id, req->uid);
debug3("_rpc_batch_job: call to _forkexec_slurmstepd");
- rc = _forkexec_slurmstepd(LAUNCH_BATCH_JOB, (void *)req, cli, NULL,
- (hostset_t)NULL, SLURM_PROTOCOL_VERSION);
+ rc = _forkexec_slurmstepd(LAUNCH_BATCH_JOB, (void *)req, cli,
+ msg->auth_uid, NULL, (hostset_t)NULL,
+ SLURM_PROTOCOL_VERSION);
debug3("_rpc_batch_job: return from _forkexec_slurmstepd: %d", rc);
slurm_mutex_unlock(&launch_mutex);
@@ -4537,7 +4540,7 @@ _rpc_reattach_tasks(slurm_msg_t *msg)
/* Following call fills in gtids and local_pids when successful. */
rc = stepd_attach(fd, protocol_version, &ioaddr,
- &resp_msg.address, job_cred_sig, resp);
+ &resp_msg.address, job_cred_sig, msg->auth_uid, resp);
if (rc != SLURM_SUCCESS) {
debug2("stepd_attach call failed");
goto done2;
diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c
index ebd46e1e8c..76fc73df70 100644
--- a/src/slurmd/slurmd/slurmd.c
+++ b/src/slurmd/slurmd/slurmd.c
@@ -540,7 +540,11 @@ _service_connection(void *arg)
/* if this fails we need to make sure the nodes we forward
to are taken care of and sent back. This way the control
also has a better idea what happened to us */
- slurm_send_rc_msg(msg, rc);
+ if (msg->auth_uid_set)
+ slurm_send_rc_msg(msg, rc);
+ else
+ debug("%s: incomplete message", __func__);
+
goto cleanup;
}
debug2("got this type of message %d", msg->msg_type);
diff --git a/src/slurmd/slurmstepd/io.c b/src/slurmd/slurmstepd/io.c
index 9000d05777..591eec7f45 100644
--- a/src/slurmd/slurmstepd/io.c
+++ b/src/slurmd/slurmstepd/io.c
@@ -1946,6 +1946,7 @@ _user_managed_io_connect(srun_info_t *srun, uint32_t gtid)
slurm_msg_t_init(&msg);
msg.protocol_version = srun->protocol_version;
msg.msg_type = TASK_USER_MANAGED_IO_STREAM;
+ slurm_msg_set_r_uid(&msg, srun->uid);
msg.data = &user_io_msg;
user_io_msg.task_id = gtid;
diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c
index 97f9a409e0..bbec2547ed 100644
--- a/src/slurmd/slurmstepd/mgr.c
+++ b/src/slurmd/slurmstepd/mgr.c
@@ -161,7 +161,7 @@ typedef struct kill_thread {
static bool _access(const char *path, int modes, uid_t uid,
int ngids, gid_t *gids);
static void _send_launch_failure(launch_tasks_request_msg_t *,
- slurm_addr_t *, int, uint16_t);
+ slurm_addr_t *, uid_t, int, uint16_t);
static int _fork_all_tasks(stepd_step_rec_t *job, bool *io_initialized);
static int _become_user(stepd_step_rec_t *job, struct priv_state *ps);
static void _set_prio_process (stepd_step_rec_t *job);
@@ -199,7 +199,8 @@ static stepd_step_rec_t *reattach_job;
*/
extern stepd_step_rec_t *
mgr_launch_tasks_setup(launch_tasks_request_msg_t *msg, slurm_addr_t *cli,
- slurm_addr_t *self, uint16_t protocol_version)
+ uid_t cli_uid, slurm_addr_t *self,
+ uint16_t protocol_version)
{
stepd_step_rec_t *job = NULL;
@@ -209,7 +210,8 @@ mgr_launch_tasks_setup(launch_tasks_request_msg_t *msg, slurm_addr_t *cli,
reset in _send_launch_failure.
*/
int fail = errno;
- _send_launch_failure(msg, cli, errno, protocol_version);
+ _send_launch_failure(msg, cli, cli_uid, errno,
+ protocol_version);
errno = fail;
return NULL;
}
@@ -720,6 +722,7 @@ _send_exit_msg(stepd_step_rec_t *job, uint32_t *tid, int n, int status)
/* This should always be set to something else we have a bug. */
xassert(srun->protocol_version);
resp.protocol_version = srun->protocol_version;
+ slurm_msg_set_r_uid(&resp, srun->uid);
if (_send_srun_resp_msg(&resp, job->nnodes) != SLURM_SUCCESS)
error("Failed to send MESSAGE_TASK_EXIT: %m");
@@ -819,6 +822,7 @@ _one_step_complete_msg(stepd_step_rec_t *job, int first, int last)
}
/*********************************************/
slurm_msg_t_init(&req);
+ slurm_msg_set_r_uid(&req, slurm_conf.slurmd_user_id);
req.msg_type = REQUEST_STEP_COMPLETE;
req.data = &msg;
req.address = step_complete.parent_addr;
@@ -2428,8 +2432,8 @@ extern int stepd_drain_node(char *reason)
}
static void
-_send_launch_failure(launch_tasks_request_msg_t *msg, slurm_addr_t *cli, int rc,
- uint16_t protocol_version)
+_send_launch_failure(launch_tasks_request_msg_t *msg, slurm_addr_t *cli,
+ uid_t cli_uid, int rc, uint16_t protocol_version)
{
slurm_msg_t resp_msg;
launch_tasks_response_msg_t resp;
@@ -2465,6 +2469,7 @@ _send_launch_failure(launch_tasks_request_msg_t *msg, slurm_addr_t *cli, int rc,
resp_msg.data = &resp;
resp_msg.msg_type = RESPONSE_LAUNCH_TASKS;
resp_msg.protocol_version = protocol_version;
+ slurm_msg_set_r_uid(&resp_msg, cli_uid);
resp.job_id = msg->job_id;
resp.step_id = msg->job_step_id;
@@ -2493,6 +2498,7 @@ _send_launch_resp(stepd_step_rec_t *job, int rc)
slurm_msg_t_init(&resp_msg);
resp_msg.address = srun->resp_addr;
+ slurm_msg_set_r_uid(&resp_msg, srun->uid);
resp_msg.protocol_version = srun->protocol_version;
resp_msg.data = &resp;
resp_msg.msg_type = RESPONSE_LAUNCH_TASKS;
diff --git a/src/slurmd/slurmstepd/mgr.h b/src/slurmd/slurmstepd/mgr.h
index 5c2d62c2a5..4b5808661a 100644
--- a/src/slurmd/slurmstepd/mgr.h
+++ b/src/slurmd/slurmstepd/mgr.h
@@ -53,7 +53,7 @@ void batch_finish(stepd_step_rec_t *job, int rc);
* Initialize a stepd_step_rec_t structure for a launch tasks
*/
stepd_step_rec_t *mgr_launch_tasks_setup(launch_tasks_request_msg_t *msg,
- slurm_addr_t *client,
+ slurm_addr_t *cli, uid_t cli_uid,
slurm_addr_t *self,
uint16_t protocol_version);
diff --git a/src/slurmd/slurmstepd/req.c b/src/slurmd/slurmstepd/req.c
index 0d27f9ec9a..72c5cf3159 100644
--- a/src/slurmd/slurmstepd/req.c
+++ b/src/slurmd/slurmstepd/req.c
@@ -1060,6 +1060,7 @@ _handle_attach(int fd, stepd_step_rec_t *job, uid_t uid)
safe_read(fd, &srun->ioaddr, sizeof(slurm_addr_t));
safe_read(fd, &srun->resp_addr, sizeof(slurm_addr_t));
safe_read(fd, srun->key, SLURM_IO_KEY_SIZE);
+ safe_read(fd, &srun->uid, sizeof(uid_t));
safe_read(fd, &srun->protocol_version, sizeof(uint16_t));
if (!srun->protocol_version)
diff --git a/src/slurmd/slurmstepd/slurmstepd.c b/src/slurmd/slurmstepd/slurmstepd.c
index 7a082178c8..7878c1841c 100644
--- a/src/slurmd/slurmstepd/slurmstepd.c
+++ b/src/slurmd/slurmstepd/slurmstepd.c
@@ -73,15 +73,16 @@
#include "src/slurmd/slurmstepd/slurmstepd.h"
#include "src/slurmd/slurmstepd/slurmstepd_job.h"
-static int _init_from_slurmd(int sock, char **argv, slurm_addr_t **_cli,
+static int _init_from_slurmd(int sock, char **argv,
+ slurm_addr_t **_cli, uid_t *_cli_uid,
slurm_addr_t **_self, slurm_msg_t **_msg);
static void _dump_user_env(void);
static void _send_ok_to_slurmd(int sock);
static void _send_fail_to_slurmd(int sock);
static void _got_ack_from_slurmd(int);
-static stepd_step_rec_t *_step_setup(slurm_addr_t *cli, slurm_addr_t *self,
- slurm_msg_t *msg);
+static stepd_step_rec_t *_step_setup(slurm_addr_t *cli, uid_t cli_uid,
+ slurm_addr_t *self, slurm_msg_t *msg);
#ifdef MEMORY_LEAK_DEBUG
static void _step_cleanup(stepd_step_rec_t *job, slurm_msg_t *msg, int rc);
#endif
@@ -104,6 +105,7 @@ int
main (int argc, char **argv)
{
slurm_addr_t *cli;
+ uid_t cli_uid;
slurm_addr_t *self;
slurm_msg_t *msg;
stepd_step_rec_t *job;
@@ -124,11 +126,11 @@ main (int argc, char **argv)
fatal( "failed to initialize authentication plugin" );
/* Receive job parameters from the slurmd */
- _init_from_slurmd(STDIN_FILENO, argv, &cli, &self, &msg);
+ _init_from_slurmd(STDIN_FILENO, argv, &cli, &cli_uid, &self, &msg);
/* Create the stepd_step_rec_t, mostly from info in a
* launch_tasks_request_msg_t or a batch_job_launch_msg_t */
- if (!(job = _step_setup(cli, self, msg))) {
+ if (!(job = _step_setup(cli, cli_uid, self, msg))) {
_send_fail_to_slurmd(STDOUT_FILENO);
rc = SLURM_FAILURE;
goto ending;
@@ -467,7 +469,8 @@ static void _set_job_log_prefix(uint32_t jobid, uint32_t stepid)
*/
static int
_init_from_slurmd(int sock, char **argv,
- slurm_addr_t **_cli, slurm_addr_t **_self, slurm_msg_t **_msg)
+ slurm_addr_t **_cli, uid_t *_cli_uid, slurm_addr_t **_self,
+ slurm_msg_t **_msg)
{
char *incoming_buffer = NULL;
Buf buffer;
@@ -475,6 +478,7 @@ _init_from_slurmd(int sock, char **argv,
int len;
uint16_t proto;
slurm_addr_t *cli = NULL;
+ uid_t cli_uid;
slurm_addr_t *self = NULL;
slurm_msg_t *msg = NULL;
uint16_t port;
@@ -555,6 +559,7 @@ _init_from_slurmd(int sock, char **argv,
if (slurm_unpack_slurm_addr_no_alloc(cli, buffer) == SLURM_ERROR)
fatal("slurmstepd: problem with unpack of slurmd_conf");
free_buf(buffer);
+ safe_read(sock, &cli_uid, sizeof(uid_t));
/* receive self from slurmd */
safe_read(sock, &len, sizeof(int));
@@ -640,6 +645,7 @@ _init_from_slurmd(int sock, char **argv,
msg->protocol_version = proto;
*_cli = cli;
+ *_cli_uid = cli_uid;
*_self = self;
*_msg = msg;
@@ -651,7 +657,8 @@ rwfail:
}
static stepd_step_rec_t *
-_step_setup(slurm_addr_t *cli, slurm_addr_t *self, slurm_msg_t *msg)
+_step_setup(slurm_addr_t *cli, uid_t cli_uid, slurm_addr_t *self,
+ slurm_msg_t *msg)
{
stepd_step_rec_t *job = NULL;
@@ -662,7 +669,7 @@ _step_setup(slurm_addr_t *cli, slurm_addr_t *self, slurm_msg_t *msg)
break;
case REQUEST_LAUNCH_TASKS:
debug2("setup for a launch_task");
- job = mgr_launch_tasks_setup(msg->data, cli, self,
+ job = mgr_launch_tasks_setup(msg->data, cli, cli_uid, self,
msg->protocol_version);
break;
default:
diff --git a/src/slurmd/slurmstepd/slurmstepd_job.c b/src/slurmd/slurmstepd/slurmstepd_job.c
index 4e58e5fca0..9c758ade26 100644
--- a/src/slurmd/slurmstepd/slurmstepd_job.c
+++ b/src/slurmd/slurmstepd/slurmstepd_job.c
@@ -378,7 +378,7 @@ extern stepd_step_rec_t *stepd_step_rec_create(launch_tasks_request_msg_t *msg,
memset(&io_addr, 0, sizeof(slurm_addr_t));
}
- srun = srun_info_create(msg->cred, &resp_addr, &io_addr,
+ srun = srun_info_create(msg->cred, &resp_addr, &io_addr, job->uid,
protocol_version);
job->profile = msg->profile;
@@ -550,7 +550,7 @@ batch_stepd_step_rec_create(batch_job_launch_msg_t *msg)
get_cred_gres(msg->cred, conf->node_name,
&job->job_gres_list, &job->step_gres_list);
- srun = srun_info_create(NULL, NULL, NULL, NO_VAL16);
+ srun = srun_info_create(NULL, NULL, NULL, job->uid, NO_VAL16);
list_append(job->sruns, (void *) srun);
@@ -636,7 +636,7 @@ stepd_step_rec_destroy(stepd_step_rec_t *job)
extern srun_info_t *
srun_info_create(slurm_cred_t *cred, slurm_addr_t *resp_addr,
- slurm_addr_t *ioaddr, uint16_t protocol_version)
+ slurm_addr_t *ioaddr, uid_t uid, uint16_t protocol_version)
{
char *data = NULL;
uint32_t len = 0;
@@ -647,6 +647,7 @@ srun_info_create(slurm_cred_t *cred, slurm_addr_t *resp_addr,
if (!protocol_version || (protocol_version == NO_VAL16))
protocol_version = SLURM_PROTOCOL_VERSION;
srun->protocol_version = protocol_version;
+ srun->uid = uid;
/*
* If no credential was provided, return the empty
* srun info object. (This is used, for example, when
diff --git a/src/slurmd/slurmstepd/slurmstepd_job.h b/src/slurmd/slurmstepd/slurmstepd_job.h
index a8ca8daf7e..9bf4956223 100644
--- a/src/slurmd/slurmstepd/slurmstepd_job.h
+++ b/src/slurmd/slurmstepd/slurmstepd_job.h
@@ -68,6 +68,7 @@ typedef struct {
slurm_addr_t ioaddr; /* Address to connect on for normal I/O.
Spawn IO uses messages to the normal
resp_addr. */
+ uid_t uid; /* user id for job */
uint16_t protocol_version; /* protocol_version of the srun */
} srun_info_t;
@@ -256,7 +257,8 @@ stepd_step_rec_t * batch_stepd_step_rec_create(batch_job_launch_msg_t *msg);
void stepd_step_rec_destroy(stepd_step_rec_t *job);
srun_info_t * srun_info_create(slurm_cred_t *cred, slurm_addr_t *respaddr,
- slurm_addr_t *ioaddr, uint16_t protocol_version);
+ slurm_addr_t *ioaddr, uid_t uid,
+ uint16_t protocol_version);
void srun_info_destroy(srun_info_t *srun);
diff --git a/src/slurmd/slurmstepd/x11_forwarding.c b/src/slurmd/slurmstepd/x11_forwarding.c
index 1cb3a3407f..14665ffa17 100644
--- a/src/slurmd/slurmstepd/x11_forwarding.c
+++ b/src/slurmd/slurmstepd/x11_forwarding.c
@@ -84,6 +84,8 @@ static int x11_display = 0;
void *_handle_channel(void *x);
void *_keepalive_engine(void *x);
void *_accept_engine(void *x);
+/* Target UID */
+static uid_t job_uid;
/*
* libssh2 has some quirks with the mixed use of blocking vs. non-blocking
@@ -207,6 +209,8 @@ extern int setup_x11_forward(stepd_step_rec_t *job, int *display,
xsignal(SIGTERM, _shutdown_x11);
xsignal_unblock(sig_array);
+ job_uid = job->uid;
+
debug("X11Parameters: %s", conf->x11_params);
if (!(home = _get_home(job->uid))) {
diff --git a/src/slurmdbd/read_config.c b/src/slurmdbd/read_config.c
index 621b8576eb..e569b89b90 100644
--- a/src/slurmdbd/read_config.c
+++ b/src/slurmdbd/read_config.c
@@ -626,6 +626,7 @@ extern int read_slurmdbd_conf(void)
if (!slurmdbd_conf->purge_usage)
slurmdbd_conf->purge_usage = NO_VAL;
+ slurm_conf.last_update = time(NULL);
slurm_mutex_unlock(&conf_mutex);
return SLURM_SUCCESS;
}
diff --git a/src/slurmdbd/slurmdbd.c b/src/slurmdbd/slurmdbd.c
index 471c724da5..ac876c3988 100644
--- a/src/slurmdbd/slurmdbd.c
+++ b/src/slurmdbd/slurmdbd.c
@@ -818,6 +818,7 @@ static int _send_slurmctld_register_req(slurmdb_cluster_rec_t *cluster_rec)
} else {
slurm_msg_t out_msg;
slurm_msg_t_init(&out_msg);
+ slurm_msg_set_r_uid(&out_msg, SLURM_AUTH_UID_ANY);
out_msg.msg_type = ACCOUNTING_REGISTER_CTLD;
out_msg.flags = SLURM_GLOBAL_AUTH_KEY;
out_msg.protocol_version = cluster_rec->rpc_version;
--
2.35.3
From e02079f107d52210b6e68c3c551a93998275a578 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Tue, 10 May 2022 21:39:56 +0200
Subject: [PATCH 02/30] Convert slurm_conf.slurmd_user_id ->
slurm_get_slurmd_user_id()
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/api/signal.c | 4 ++--
src/common/slurm_protocol_api.c | 2 +-
src/plugins/mpi/pmix/pmixp_dconn.c | 2 +-
src/plugins/mpi/pmix/pmixp_server.c | 2 +-
src/plugins/mpi/pmix/pmixp_utils.c | 2 +-
src/slurmd/slurmstepd/mgr.c | 2 +-
6 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/src/api/signal.c b/src/api/signal.c
index a036cf0190..caf04a728e 100644
--- a/src/api/signal.c
+++ b/src/api/signal.c
@@ -102,7 +102,7 @@ static int _signal_batch_script_step(const resource_allocation_response_msg_t
rpc.flags = KILL_JOB_BATCH;
slurm_msg_t_init(&msg);
- slurm_msg_set_r_uid(&msg, slurm_conf.slurmd_user_id);
+ slurm_msg_set_r_uid(&msg, slurm_get_slurmd_user_id());
msg.msg_type = REQUEST_SIGNAL_TASKS;
msg.data = &rpc;
if (slurm_conf_get_addr(name, &msg.address) == SLURM_ERROR) {
@@ -160,7 +160,7 @@ static int _terminate_batch_script_step(const resource_allocation_response_msg_t
slurm_msg_t_init(&msg);
msg.msg_type = REQUEST_TERMINATE_TASKS;
- slurm_msg_set_r_uid(&msg, slurm_conf.slurmd_user_id);
+ slurm_msg_set_r_uid(&msg, slurm_get_slurmd_user_id());
msg.data = &rpc;
if (slurm_conf_get_addr(name, &msg.address) == SLURM_ERROR) {
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index f9ea5e5a05..117cb1afd5 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -4407,7 +4407,7 @@ static void _resp_msg_setup(slurm_msg_t *msg, slurm_msg_t *resp_msg,
if (!msg->auth_uid_set)
slurm_msg_set_r_uid(resp_msg, SLURM_AUTH_NOBODY);
else if ((msg->auth_uid != slurm_conf.slurm_user_id) &&
- (msg->auth_uid != slurm_conf.slurmd_user_id))
+ (msg->auth_uid != slurm_get_slurmd_user_id()))
slurm_msg_set_r_uid(resp_msg, msg->auth_uid);
else
slurm_msg_set_r_uid(resp_msg, SLURM_AUTH_UID_ANY);
diff --git a/src/plugins/mpi/pmix/pmixp_dconn.c b/src/plugins/mpi/pmix/pmixp_dconn.c
index 594230bfc1..6a1fb42732 100644
--- a/src/plugins/mpi/pmix/pmixp_dconn.c
+++ b/src/plugins/mpi/pmix/pmixp_dconn.c
@@ -79,7 +79,7 @@ int pmixp_dconn_init(int node_cnt, pmixp_p2p_data_t direct_hdr)
_pmixp_dconn_conns[i].nodeid = i;
_pmixp_dconn_conns[i].state = PMIXP_DIRECT_INIT;
_pmixp_dconn_conns[i].priv = _pmixp_dconn_h.init(i, direct_hdr);
- _pmixp_dconn_conns[i].uid = slurm_conf.slurmd_user_id;
+ _pmixp_dconn_conns[i].uid = slurm_get_slurmd_user_id();
}
return SLURM_SUCCESS;
}
diff --git a/src/plugins/mpi/pmix/pmixp_server.c b/src/plugins/mpi/pmix/pmixp_server.c
index 944ab865ae..b6bc4ae8fd 100644
--- a/src/plugins/mpi/pmix/pmixp_server.c
+++ b/src/plugins/mpi/pmix/pmixp_server.c
@@ -543,7 +543,7 @@ static int _auth_cred_verify(Buf buf, uid_t *uid)
} else {
uid_t auth_uid;
auth_uid = g_slurm_auth_get_uid(auth_cred);
- if ((auth_uid != slurm_conf.slurmd_user_id) &&
+ if ((auth_uid != slurm_get_slurmd_user_id()) &&
(auth_uid != _pmixp_job_info.uid)) {
PMIXP_ERROR("Credential from uid %u", auth_uid);
rc = SLURM_ERROR;
diff --git a/src/plugins/mpi/pmix/pmixp_utils.c b/src/plugins/mpi/pmix/pmixp_utils.c
index bb89fc1133..ea1bd5e337 100644
--- a/src/plugins/mpi/pmix/pmixp_utils.c
+++ b/src/plugins/mpi/pmix/pmixp_utils.c
@@ -404,7 +404,7 @@ static int _pmix_p2p_send_core(const char *nodename, const char *address,
msg.forward.timeout = timeout;
msg.forward.cnt = 0;
msg.forward.nodelist = NULL;
- slurm_msg_set_r_uid(&msg, slurm_conf.slurmd_user_id);
+ slurm_msg_set_r_uid(&msg, slurm_get_slurmd_user_id());
ret_list = slurm_send_addr_recv_msgs(&msg, (char*)nodename, timeout);
if (!ret_list) {
/* This should never happen (when this was
diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c
index bbec2547ed..203f92e668 100644
--- a/src/slurmd/slurmstepd/mgr.c
+++ b/src/slurmd/slurmstepd/mgr.c
@@ -822,7 +822,7 @@ _one_step_complete_msg(stepd_step_rec_t *job, int first, int last)
}
/*********************************************/
slurm_msg_t_init(&req);
- slurm_msg_set_r_uid(&req, slurm_conf.slurmd_user_id);
+ slurm_msg_set_r_uid(&req, slurm_get_slurmd_user_id());
req.msg_type = REQUEST_STEP_COMPLETE;
req.data = &msg;
req.address = step_complete.parent_addr;
--
2.35.3
From ce15aa4a3d8366b1833b25bf6b3fa4b82c16748f Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Tue, 10 May 2022 21:44:04 +0200
Subject: [PATCH 03/30] Convert slurm_conf.slurm_user_id ->
slurm_get_slurm_user_id()
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/api/reconfigure.c | 2 +-
src/common/slurm_protocol_api.c | 4 ++--
src/slurmctld/backup.c | 2 +-
src/slurmctld/controller.c | 2 +-
4 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/api/reconfigure.c b/src/api/reconfigure.c
index f4a49f9a81..5dad0c79c7 100644
--- a/src/api/reconfigure.c
+++ b/src/api/reconfigure.c
@@ -157,7 +157,7 @@ static int _send_message_controller(int dest, slurm_msg_t *req)
slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_CONNECTION_ERROR);
}
- slurm_msg_set_r_uid(req, slurm_conf.slurm_user_id);
+ slurm_msg_set_r_uid(req, slurm_get_slurm_user_id());
if (slurm_send_node_msg(fd, req) < 0) {
close(fd);
slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_SEND_ERROR);
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index 117cb1afd5..c452771fe1 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -4406,7 +4406,7 @@ static void _resp_msg_setup(slurm_msg_t *msg, slurm_msg_t *resp_msg,
*/
if (!msg->auth_uid_set)
slurm_msg_set_r_uid(resp_msg, SLURM_AUTH_NOBODY);
- else if ((msg->auth_uid != slurm_conf.slurm_user_id) &&
+ else if ((msg->auth_uid != slurm_get_slurm_user_id()) &&
(msg->auth_uid != slurm_get_slurmd_user_id()))
slurm_msg_set_r_uid(resp_msg, msg->auth_uid);
else
@@ -4840,7 +4840,7 @@ extern int slurm_send_only_controller_msg(slurm_msg_t *req,
goto cleanup;
}
- slurm_msg_set_r_uid(req, slurm_conf.slurm_user_id);
+ slurm_msg_set_r_uid(req, slurm_get_slurm_user_id());
if ((rc = slurm_send_node_msg(fd, req)) < 0) {
rc = SLURM_ERROR;
diff --git a/src/slurmctld/backup.c b/src/slurmctld/backup.c
index e9ba2849e9..8b9df50184 100644
--- a/src/slurmctld/backup.c
+++ b/src/slurmctld/backup.c
@@ -633,7 +633,7 @@ static void *_shutdown_controller(void *arg)
xfree(arg);
slurm_msg_t_init(&req);
- slurm_msg_set_r_uid(&req, slurm_conf.slurm_user_id);
+ slurm_msg_set_r_uid(&req, slurm_get_slurm_user_id());
slurm_set_addr(&req.address, slurmctld_conf.slurmctld_port,
slurmctld_conf.control_addr[shutdown_inx]);
req.msg_type = REQUEST_CONTROL;
diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c
index 20a7585f34..3aad536940 100644
--- a/src/slurmctld/controller.c
+++ b/src/slurmctld/controller.c
@@ -2709,7 +2709,7 @@ static void *_shutdown_bu_thread(void *arg)
xfree(arg);
slurm_msg_t_init(&req);
- slurm_msg_set_r_uid(&req, slurm_conf.slurm_user_id);
+ slurm_msg_set_r_uid(&req, slurm_get_slurm_user_id());
slurm_set_addr(&req.address, slurmctld_conf.slurmctld_port,
slurmctld_conf.control_addr[bu_inx]);
req.msg_type = REQUEST_CONTROL;
--
2.35.3
From d64429652e6f601fb14ddab7d334510a2d3a6db0 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Tue, 10 May 2022 21:46:37 +0200
Subject: [PATCH 04/30] Convert slurm_conf.last_update ->
slurmctld_conf.last_update
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_protocol_api.c | 4 ++--
src/slurmdbd/read_config.c | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index c452771fe1..9b377c517f 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -915,10 +915,10 @@ static int _check_hash(buf_t *buffer, header_t *header, slurm_msg_t *msg,
static time_t config_update = (time_t) -1;
static bool block_null_hash = true;
- if (config_update != slurm_conf.last_update) {
+ if (config_update != slurmctld_conf.last_update) {
block_null_hash = (xstrcasestr(slurm_conf.comm_params,
"block_null_hash"));
- config_update = slurm_conf.last_update;
+ config_update = slurmctld_conf.last_update;
}
rc = auth_g_get_data(cred, &cred_hash, &cred_hash_len);
diff --git a/src/slurmdbd/read_config.c b/src/slurmdbd/read_config.c
index e569b89b90..6b41039a2b 100644
--- a/src/slurmdbd/read_config.c
+++ b/src/slurmdbd/read_config.c
@@ -626,7 +626,7 @@ extern int read_slurmdbd_conf(void)
if (!slurmdbd_conf->purge_usage)
slurmdbd_conf->purge_usage = NO_VAL;
- slurm_conf.last_update = time(NULL);
+ slurmctld_conf.last_update = time(NULL);
slurm_mutex_unlock(&conf_mutex);
return SLURM_SUCCESS;
}
--
2.35.3
From 385e71eddab8360dc956f26f672d98a2da749f2b Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Tue, 10 May 2022 21:54:56 +0200
Subject: [PATCH 05/30] Convert log_flag_hex() to local _print_data()
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_protocol_api.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index 9b377c517f..b4db0e47be 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -3716,7 +3716,9 @@ List slurm_receive_resp_msgs(int fd, int steps, int timeout)
goto total_return;
}
- log_flag_hex(NET_RAW, buf, buflen, "%s: read", __func__);
+#if _DEBUG
+ _print_data (buf, buflen);
+#endif
buffer = create_buf(buf, buflen);
if (unpack_header(&header, buffer) == SLURM_ERROR) {
--
2.35.3
From 36cd787327673303cd730766e28b306617c0e4f1 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Tue, 10 May 2022 21:57:09 +0200
Subject: [PATCH 06/30] Convert slurm_conf.comm_params ->
slurm_get_comm_parameters()
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_protocol_api.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index b4db0e47be..82a1577810 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -916,8 +916,10 @@ static int _check_hash(buf_t *buffer, header_t *header, slurm_msg_t *msg,
static bool block_null_hash = true;
if (config_update != slurmctld_conf.last_update) {
- block_null_hash = (xstrcasestr(slurm_conf.comm_params,
+ char * comm_parameters = slurm_get_comm_parameters();
+ block_null_hash = (xstrcasestr(comm_parameters,
"block_null_hash"));
+ xfree(comm_parameters);
config_update = slurmctld_conf.last_update;
}
--
2.35.3
From 864a80adfd35c648608b1e1f73a03c6f9771a986 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Tue, 10 May 2022 21:58:46 +0200
Subject: [PATCH 07/30] Convert slurm_conf.msg_timeout no ->
slurm_get_msg_timeout()
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_protocol_api.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index 82a1577810..b6a36fe918 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -3681,12 +3681,12 @@ List slurm_receive_resp_msgs(int fd, int steps, int timeout)
if (timeout <= 0) {
/* convert secs to msec */
- timeout = slurm_conf.msg_timeout * 1000;
+ timeout = slurm_get_msg_timeout() * 1000;
orig_timeout = timeout;
}
if (steps) {
if (message_timeout < 0)
- message_timeout = slurm_conf.msg_timeout * 1000;
+ message_timeout = slurm_get_msg_timeout() * 1000;
orig_timeout = (timeout -
(message_timeout*(steps-1)))/steps;
steps--;
@@ -3697,9 +3697,9 @@ List slurm_receive_resp_msgs(int fd, int steps, int timeout)
/* we compare to the orig_timeout here because that is really
* what we are going to wait for each step
*/
- if (orig_timeout >= (slurm_conf.msg_timeout * 10000)) {
+ if (orig_timeout >= (slurm_get_msg_timeout() * 10000)) {
log_flag(NET, "%s: Sending a message with timeout's greater than %d seconds, requested timeout is %d seconds",
- __func__, (slurm_conf.msg_timeout * 10),
+ __func__, (slurm_get_msg_timeout() * 10),
(timeout/1000));
} else if (orig_timeout < 1000) {
log_flag(NET, "%s: Sending a message with a very short timeout of %d milliseconds each step in the tree has %d milliseconds",
--
2.35.3
From 9af1830c7a048612edca8b4165fc4a48329aeae1 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Wed, 11 May 2022 08:36:38 +0200
Subject: [PATCH 08/30] Fix g_slurm_auth_create in _pack_composite_msg
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_protocol_pack.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c
index 9237870f0d..424c7e619e 100644
--- a/src/common/slurm_protocol_pack.c
+++ b/src/common/slurm_protocol_pack.c
@@ -5149,7 +5149,8 @@ _pack_composite_msg(composite_msg_t *msg, Buf buffer, uint16_t protocol_version)
/* FIXME: this should handle the
* _global_auth_key() as well. */
tmp_info->auth_cred =
- g_slurm_auth_create(auth_info);
+ g_slurm_auth_create(auth_info,
+ tmp_info->restrict_uid, NULL, 0);
xfree(auth_info);
}
--
2.35.3
From 098cf5105c3a9e90181c27e326e016ceb696a2bb Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Wed, 11 May 2022 17:49:40 +0200
Subject: [PATCH 09/30] Add to set_agent_arg_r_uid() call to
_xmit_new_end_time()
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/slurmctld/job_mgr.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index 8f378f72f9..dae0b64af3 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -14766,6 +14766,7 @@ _xmit_new_end_time(struct job_record *job_ptr)
#endif
agent_args->msg_args = job_time_msg_ptr;
+ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_args);
return;
}
--
2.35.3
From 531a35967961385a4f8b243f19cb2385ee754a50 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Wed, 11 May 2022 19:41:13 +0200
Subject: [PATCH 10/30] Disable Message Aggregation
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/slurmd/slurmd/slurmd.c | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c
index 76fc73df70..858ee0fd4f 100644
--- a/src/slurmd/slurmd/slurmd.c
+++ b/src/slurmd/slurmd/slurmd.c
@@ -180,7 +180,9 @@ static void _decrement_thd_count(void);
static void _destroy_conf(void);
static int _drain_node(char *reason);
static void _fill_registration_msg(slurm_node_registration_status_msg_t *);
+#if 0
static uint64_t _get_int(const char *my_str);
+#endif
static void _handle_connection(int fd, slurm_addr_t *client);
static void _hup_handler(int);
static void _increment_thd_count(void);
@@ -2104,6 +2106,7 @@ static int _set_topo_info(void)
return rc;
}
+#if 0
static uint64_t _get_int(const char *my_str)
{
char *end = NULL;
@@ -2116,23 +2119,33 @@ static uint64_t _get_int(const char *my_str)
return NO_VAL;
return value;
}
+#endif
static uint64_t _parse_msg_aggr_params(int type, char *params)
{
uint64_t value = NO_VAL;
+#if 0
char *sub_str = NULL;
-
+#endif
if (!params)
return NO_VAL;
switch (type) {
case WINDOW_TIME:
+ info("Message aggregation has been disabled, "
+ "please check SLE release notes!");
+#if 0
if ((sub_str = xstrcasestr(params, "WindowTime=")))
value = _get_int(sub_str + 11);
+#endif
break;
case WINDOW_MSGS:
+ info("Message aggregation has been disabled, "
+ "please check SLE release notes!");
+#if 0
if ((sub_str = xstrcasestr(params, "WindowMsgs=")))
value = _get_int(sub_str + 11);
+#endif
break;
default:
fatal("invalid message aggregation parameters: %s", params);
--
2.35.3
From 7a51b172b6b245ae5ff4c43efdb1ab3fbfb3492e Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Fri, 13 May 2022 09:01:15 +0200
Subject: [PATCH 11/30] Add missing slurm_msg_set_r_uid()
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/slurmd/slurmstepd/mgr.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c
index 203f92e668..8e5ece5d8f 100644
--- a/src/slurmd/slurmstepd/mgr.c
+++ b/src/slurmd/slurmstepd/mgr.c
@@ -2571,6 +2571,7 @@ _send_complete_batch_script_msg(stepd_step_rec_t *job, int err, int status)
slurm_set_addr_char(&req_msg.address,
conf->port, conf->hostname);
}
+ slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY);
msg_rc = slurm_send_recv_rc_msg_only_one(&req_msg,
&rc, 0);
}
--
2.35.3
From c69493cfa27687e17b77f1f93af71b435f8f7ad6 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Fri, 13 May 2022 20:16:05 +0200
Subject: [PATCH 12/30] Fix g_slurm_auth_create in _stepd_connect_legacy
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/stepd_api.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/common/stepd_api.c b/src/common/stepd_api.c
index 8f68a09e28..2aa35e7445 100644
--- a/src/common/stepd_api.c
+++ b/src/common/stepd_api.c
@@ -250,7 +250,8 @@ stepd_connect(const char *directory, const char *nodename,
buffer = init_buf(0);
/* Create an auth credential */
auth_info = slurm_get_auth_info();
- auth_cred = g_slurm_auth_create(auth_info);
+ auth_cred = g_slurm_auth_create(auth_info,
+ slurm_get_slurmd_user_id(), NULL, 0);
xfree(auth_info);
if (auth_cred == NULL) {
error("Creating authentication credential: %s",
--
2.35.3
From b3af841f47cb8c7c769f5099cfcbf1590d073872 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Sat, 14 May 2022 09:23:17 +0200
Subject: [PATCH 13/30] Add missing auth_info arguments
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_protocol_api.c | 23 ++++++++++++++++-------
1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index b6a36fe918..40c07b8e2b 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -906,7 +906,7 @@ char *slurm_get_priority_weight_tres(void)
return weights;
}
-static int _check_hash(buf_t *buffer, header_t *header, slurm_msg_t *msg,
+static int _check_hash(struct slurm_buf *buffer, header_t *header, slurm_msg_t *msg,
void *cred)
{
char *cred_hash = NULL;
@@ -3268,6 +3268,7 @@ extern int slurm_unpack_received_msg(slurm_msg_t *msg, int fd, Buf buffer)
header_t header;
int rc;
void *auth_cred = NULL;
+ char *auth_info;
if (unpack_header(&header, buffer) == SLURM_ERROR) {
rc = SLURM_COMMUNICATIONS_RECEIVE_ERROR;
@@ -3317,7 +3318,7 @@ extern int slurm_unpack_received_msg(slurm_msg_t *msg, int fd, Buf buffer)
if (header.flags & SLURM_GLOBAL_AUTH_KEY) {
rc = g_slurm_auth_verify(auth_cred, _global_auth_key());
} else {
- char *auth_info = slurm_get_auth_info();
+ auth_info = slurm_get_auth_info();
rc = g_slurm_auth_verify(auth_cred, auth_info);
xfree(auth_info);
}
@@ -3331,7 +3332,9 @@ extern int slurm_unpack_received_msg(slurm_msg_t *msg, int fd, Buf buffer)
goto total_return;
}
- msg->auth_uid = g_slurm_auth_get_uid(auth_cred);
+ auth_info = slurm_get_auth_info();
+ msg->auth_uid = g_slurm_auth_get_uid(auth_cred, auth_info);
+ xfree(auth_info);
msg->auth_uid_set = true;
/*
@@ -3490,6 +3493,7 @@ List slurm_receive_msgs(int fd, int steps, int timeout)
ret_data_info_t *ret_data_info = NULL;
List ret_list = NULL;
int orig_timeout = timeout;
+ char *auth_info;
xassert(fd >= 0);
@@ -3595,7 +3599,7 @@ List slurm_receive_msgs(int fd, int steps, int timeout)
if (header.flags & SLURM_GLOBAL_AUTH_KEY) {
rc = g_slurm_auth_verify(auth_cred, _global_auth_key());
} else {
- char *auth_info = slurm_get_auth_info();
+ auth_info = slurm_get_auth_info();
rc = g_slurm_auth_verify(auth_cred, auth_info);
xfree(auth_info);
}
@@ -3609,7 +3613,9 @@ List slurm_receive_msgs(int fd, int steps, int timeout)
goto total_return;
}
- msg.auth_uid = g_slurm_auth_get_uid(auth_cred);
+ auth_info = slurm_get_auth_info();
+ msg.auth_uid = g_slurm_auth_get_uid(auth_cred, auth_info);
+ xfree(auth_info);
msg.auth_uid_set = true;
/*
* Unpack message body
@@ -3848,6 +3854,7 @@ int slurm_receive_msg_and_forward(int fd, slurm_addr_t *orig_addr,
int rc;
void *auth_cred = NULL;
Buf buffer;
+ char *auth_info;
xassert(fd >= 0);
@@ -3983,7 +3990,7 @@ int slurm_receive_msg_and_forward(int fd, slurm_addr_t *orig_addr,
if (header.flags & SLURM_GLOBAL_AUTH_KEY) {
rc = g_slurm_auth_verify(auth_cred, _global_auth_key());
} else {
- char *auth_info = slurm_get_auth_info();
+ auth_info = slurm_get_auth_info();
rc = g_slurm_auth_verify(auth_cred, auth_info);
xfree(auth_info);
}
@@ -3997,7 +4004,9 @@ int slurm_receive_msg_and_forward(int fd, slurm_addr_t *orig_addr,
goto total_return;
}
- msg->auth_uid = g_slurm_auth_get_uid(auth_cred);
+ auth_info = slurm_get_auth_info();
+ msg->auth_uid = g_slurm_auth_get_uid(auth_cred, auth_info);
+ xfree(auth_info);
msg->auth_uid_set = true;
/*
--
2.35.3
From 4aa7abec3c47f1683b1d95296a051abe15543424 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Mon, 16 May 2022 08:04:00 +0200
Subject: [PATCH 14/30] Fix argument in call to slurm_get_plugin_hash_enable
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_auth.c | 4 ++--
src/common/slurm_auth.h | 2 +-
src/common/slurm_protocol_api.c | 2 +-
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/common/slurm_auth.c b/src/common/slurm_auth.c
index 556da92ad0..bd99b25254 100644
--- a/src/common/slurm_auth.c
+++ b/src/common/slurm_auth.c
@@ -131,12 +131,12 @@ slurm_auth_generic_errstr( int slurm_errno )
}
}
-extern bool slurm_get_plugin_hash_enable(int index)
+extern bool slurm_get_plugin_hash_enable()
{
if (slurm_auth_init(NULL) < 0)
return true;
- return *(ops[index].hash_enable);
+ return *(ops.hash_enable);
}
diff --git a/src/common/slurm_auth.h b/src/common/slurm_auth.h
index d82cdf8775..7297efd203 100644
--- a/src/common/slurm_auth.h
+++ b/src/common/slurm_auth.h
@@ -138,7 +138,7 @@ extern int slurm_auth_fini( void );
* Check if plugin type corresponding to the authentication
* plugin index supports hash.
*/
-extern bool slurm_get_plugin_hash_enable(int index);
+extern bool slurm_get_plugin_hash_enable();
/*
* Static bindings for the global authentication context.
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index 40c07b8e2b..95140eed67 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -931,7 +931,7 @@ static int _check_hash(struct slurm_buf *buffer, header_t *header, slurm_msg_t *
&msg->msg_type, sizeof(msg->msg_type)))
rc = SLURM_ERROR;
} else if (block_null_hash &&
- slurm_get_plugin_hash_enable(msg->auth_index))
+ slurm_get_plugin_hash_enable())
rc = SLURM_ERROR;
xfree(cred_hash);
--
2.35.3
From 3990c22bf868d689c27f735b850c0f53aa098cd6 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Mon, 16 May 2022 08:04:48 +0200
Subject: [PATCH 15/30] Fix buffer type
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_protocol_api.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index 95140eed67..0b8ff24168 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -3675,7 +3675,7 @@ List slurm_receive_resp_msgs(int fd, int steps, int timeout)
int rc;
void *auth_cred = NULL;
slurm_msg_t msg;
- buf_t *buffer;
+ Buf buffer;
ret_data_info_t *ret_data_info = NULL;
List ret_list = NULL;
int orig_timeout = timeout;
--
2.35.3
From 1b5d1b3a5d88b5e159b0dc5887c651532dc64f51 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Mon, 16 May 2022 08:06:49 +0200
Subject: [PATCH 16/30] Fix arguments to forward_init()
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_protocol_api.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index 0b8ff24168..9d2226b140 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -3719,7 +3719,7 @@ List slurm_receive_resp_msgs(int fd, int steps, int timeout)
* the message.
*/
if (slurm_msg_recvfrom_timeout(fd, &buf, &buflen, 0, timeout) < 0) {
- forward_init(&header.forward);
+ forward_init(&header.forward, NULL);
rc = errno;
goto total_return;
}
--
2.35.3
From ac03190376ec8ffc4ffdb6b61e39477d4fdb4cd9 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Mon, 16 May 2022 08:07:20 +0200
Subject: [PATCH 17/30] Fix log message function
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_protocol_api.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index 9d2226b140..70d394965b 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -3698,17 +3698,17 @@ List slurm_receive_resp_msgs(int fd, int steps, int timeout)
steps--;
}
- log_flag(NET, "%s: orig_timeout was %d we have %d steps and a timeout of %d",
+ debug("%s: orig_timeout was %d we have %d steps and a timeout of %d",
__func__, orig_timeout, steps, timeout);
/* we compare to the orig_timeout here because that is really
* what we are going to wait for each step
*/
if (orig_timeout >= (slurm_get_msg_timeout() * 10000)) {
- log_flag(NET, "%s: Sending a message with timeout's greater than %d seconds, requested timeout is %d seconds",
+ debug("%s: Sending a message with timeout's greater than %d seconds, requested timeout is %d seconds",
__func__, (slurm_get_msg_timeout() * 10),
(timeout/1000));
} else if (orig_timeout < 1000) {
- log_flag(NET, "%s: Sending a message with a very short timeout of %d milliseconds each step in the tree has %d milliseconds",
+ debug("%s: Sending a message with a very short timeout of %d milliseconds each step in the tree has %d milliseconds",
__func__, timeout, orig_timeout);
}
--
2.35.3
From 10702de5daeabbd3a8c01d8a736b5baeeee5058d Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Mon, 16 May 2022 08:26:13 +0200
Subject: [PATCH 18/30] Fix aruments in g_slurm_auth_unpack()
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_protocol_api.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index 70d394965b..1e17d301ab 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -3765,7 +3765,7 @@ List slurm_receive_resp_msgs(int fd, int steps, int timeout)
__func__);
}
- if (!(auth_cred = g_slurm_auth_unpack(buffer, header.version))) {
+ if (!(auth_cred = g_slurm_auth_unpack(buffer))) {
error("%s: auth_g_unpack: %m", __func__);
free_buf(buffer);
rc = ESLURM_PROTOCOL_INCOMPLETE_PACKET;
--
2.35.3
From b6f0b34a4999c2285e151a6eed6912a97dfb7c8f Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Mon, 16 May 2022 10:12:08 +0200
Subject: [PATCH 19/30] Fix auth_g_get_data()
Remove not existing wrapper.
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_auth.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/src/common/slurm_auth.c b/src/common/slurm_auth.c
index bd99b25254..786987fe53 100644
--- a/src/common/slurm_auth.c
+++ b/src/common/slurm_auth.c
@@ -224,12 +224,10 @@ int g_slurm_auth_verify(void *cred, char *auth_info)
int auth_g_get_data(void *cred, char **data, uint32_t *len)
{
- cred_wrapper_t *wrap = (cred_wrapper_t *) cred;
-
- if (!wrap || slurm_auth_init(NULL) < 0)
+ if (slurm_auth_init(NULL) < 0)
return SLURM_ERROR;
- return (*(ops[wrap->index].get_data))(cred, data, len);
+ return (*(ops.get_data))(cred, data, len);
}
uid_t g_slurm_auth_get_uid(void *cred, char *auth_info)
--
2.35.3
From 36ae179ce22f32870565c1846a08505bfe730ab6 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Mon, 16 May 2022 10:18:26 +0200
Subject: [PATCH 20/30] Add support for ESLURM_AUTH_BADARG
Signed-off-by: Egbert Eich <eich@suse.com>
---
slurm/slurm_errno.h | 1 +
src/common/slurm_errno.c | 3 ++-
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/slurm/slurm_errno.h b/slurm/slurm_errno.h
index 7b0c2a1ffc..e0907878e3 100644
--- a/slurm/slurm_errno.h
+++ b/slurm/slurm_errno.h
@@ -270,6 +270,7 @@ enum {
ESLURM_AUTH_FOPEN_ERROR,
ESLURM_AUTH_NET_ERROR,
ESLURM_AUTH_UNABLE_TO_SIGN,
+ ESLURM_AUTH_BADARG,
/* accounting errors */
ESLURM_DB_CONNECTION = 7000,
diff --git a/src/common/slurm_errno.c b/src/common/slurm_errno.c
index 83e91c09bf..f4568055be 100644
--- a/src/common/slurm_errno.c
+++ b/src/common/slurm_errno.c
@@ -460,7 +460,8 @@ static slurm_errtab_t slurm_errtab[] = {
"Failed to open authentication public key" },
{ ESLURM_AUTH_NET_ERROR,
"Failed to connect to authentication agent" },
-
+ { ESLURM_AUTH_BADARG,
+ "Bad argument to plugin function" },
/* accounting errors */
{ ESLURM_DB_CONNECTION,
"Unable to connect to database" },
--
2.35.3
From d95e4440f8bff6ed3f9d2a910ae0827a77dc86bb Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Mon, 16 May 2022 10:41:08 +0200
Subject: [PATCH 21/30] Replace response_init() by slurm_msg_t_init()
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/slurmctld/job_mgr.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index dae0b64af3..869be5b28f 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -13922,7 +13922,7 @@ extern int update_job_str(slurm_msg_t *msg, uid_t uid)
reply:
if ((rc != ESLURM_JOB_SETTING_DB_INX) && (msg->conn_fd >= 0)) {
- response_init(&resp_msg, msg);
+ slurm_msg_t_init(&resp_msg);
if (resp_array) {
resp_array_msg = _resp_array_xlate(resp_array, job_id);
resp_msg.msg_type = RESPONSE_JOB_ARRAY_ERRORS;
--
2.35.3
From 6e01919b9075ec95c5726e3405f2be7760f4562c Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Mon, 16 May 2022 14:33:14 +0200
Subject: [PATCH 22/30] Replace response_init()
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/slurmctld/job_mgr.c | 4 ++
src/slurmctld/proc_req.c | 82 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 86 insertions(+)
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index 869be5b28f..6070b55f9f 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -13923,6 +13923,8 @@ extern int update_job_str(slurm_msg_t *msg, uid_t uid)
reply:
if ((rc != ESLURM_JOB_SETTING_DB_INX) && (msg->conn_fd >= 0)) {
slurm_msg_t_init(&resp_msg);
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&resp_msg, msg->auth_uid);
if (resp_array) {
resp_array_msg = _resp_array_xlate(resp_array, job_id);
resp_msg.msg_type = RESPONSE_JOB_ARRAY_ERRORS;
@@ -16478,6 +16480,8 @@ extern int job_requeue2(uid_t uid, requeue_msg_t *req_ptr, slurm_msg_t *msg,
reply:
if (msg) {
slurm_msg_t_init(&resp_msg);
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&resp_msg, msg->auth_uid);
resp_msg.protocol_version = msg->protocol_version;
resp_msg.conn = msg->conn;
if (resp_array) {
diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c
index 3362e53572..5b0a350201 100644
--- a/src/slurmctld/proc_req.c
+++ b/src/slurmctld/proc_req.c
@@ -1537,6 +1537,8 @@ static void _slurm_rpc_allocate_pack(slurm_msg_t * msg)
response_msg.conn = msg->conn;
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.msg_type = RESPONSE_JOB_PACK_ALLOCATION;
response_msg.data = resp;
@@ -1756,6 +1758,8 @@ send_msg:
response_msg.conn = msg->conn;
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.msg_type = RESPONSE_RESOURCE_ALLOCATION;
response_msg.data = alloc_msg;
@@ -1834,6 +1838,8 @@ static void _slurm_rpc_dump_conf(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_BUILD_INFO;
@@ -1889,6 +1895,8 @@ static void _slurm_rpc_dump_jobs(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_JOB_INFO;
@@ -1931,6 +1939,8 @@ static void _slurm_rpc_dump_jobs_user(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_JOB_INFO;
@@ -1975,6 +1985,8 @@ static void _slurm_rpc_dump_job_single(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_JOB_INFO;
@@ -2004,6 +2016,8 @@ static void _slurm_rpc_get_shares(slurm_msg_t *msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_SHARE_INFO;
@@ -2033,6 +2047,8 @@ static void _slurm_rpc_get_priority_factors(slurm_msg_t *msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_PRIORITY_FACTORS;
@@ -2071,6 +2087,8 @@ static void _slurm_rpc_end_time(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = SRUN_TIMEOUT;
@@ -2099,6 +2117,8 @@ static void _slurm_rpc_get_fed(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_FED_INFO;
@@ -2147,6 +2167,8 @@ static void _slurm_rpc_dump_front_end(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_FRONT_END_INFO;
@@ -2207,6 +2229,8 @@ static void _slurm_rpc_dump_nodes(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_NODE_INFO;
@@ -2265,6 +2289,8 @@ static void _slurm_rpc_dump_node_single(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_NODE_INFO;
@@ -2318,6 +2344,8 @@ static void _slurm_rpc_dump_partitions(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_PARTITION_INFO;
@@ -2802,6 +2830,8 @@ static void _slurm_rpc_dump_batch_script(slurm_msg_t *msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_BATCH_SCRIPT;
@@ -2932,6 +2962,8 @@ static void _slurm_rpc_job_step_create(slurm_msg_t * msg)
slurm_msg_t_init(&resp);
resp.flags = msg->flags;
resp.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&resp, msg->auth_uid);
resp.address = msg->address;
resp.conn = msg->conn;
resp.msg_type = RESPONSE_JOB_STEP_CREATE;
@@ -3001,6 +3033,8 @@ static void _slurm_rpc_job_step_get_info(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_JOB_STEP_INFO;
@@ -3108,6 +3142,8 @@ send_reply:
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_JOB_WILL_RUN;
@@ -3309,6 +3345,8 @@ static void _slurm_rpc_job_alloc_info(slurm_msg_t * msg)
response_msg.data = job_info_resp_msg;
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
slurm_send_node_msg(msg->conn_fd, &response_msg);
@@ -3413,6 +3451,8 @@ static void _slurm_rpc_job_pack_alloc_info(slurm_msg_t * msg)
response_msg.data = resp;
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
slurm_send_node_msg(msg->conn_fd, &response_msg);
FREE_NULL_LIST(resp);
}
@@ -3655,6 +3695,8 @@ static void _slurm_rpc_job_sbcast_cred(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.msg_type = RESPONSE_JOB_SBCAST_CRED;
response_msg.data = &job_info_resp_msg;
@@ -4031,6 +4073,8 @@ static void _slurm_rpc_step_layout(slurm_msg_t *msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.msg_type = RESPONSE_STEP_LAYOUT;
response_msg.data = step_layout;
@@ -4209,6 +4253,8 @@ send_msg:
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_SUBMIT_BATCH_JOB;
response_msg.data = &submit_msg;
@@ -4506,6 +4552,8 @@ send_msg:
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_SUBMIT_BATCH_JOB;
response_msg.data = &submit_msg;
@@ -5017,6 +5065,8 @@ static void _slurm_rpc_resv_create(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
memset(&resv_resp_msg, 0, sizeof(resv_resp_msg));
resv_resp_msg.name = resv_desc_ptr->name;
response_msg.msg_type = RESPONSE_CREATE_RESERVATION;
@@ -5148,6 +5198,8 @@ static void _slurm_rpc_resv_show(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_RESERVATION_INFO;
@@ -5204,6 +5256,8 @@ static void _slurm_rpc_layout_show(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_LAYOUT_INFO;
@@ -5243,6 +5297,8 @@ static void _slurm_rpc_job_ready(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
@@ -5306,6 +5362,8 @@ static void _slurm_rpc_burst_buffer_info(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_BURST_BUFFER_INFO;
@@ -5704,6 +5762,8 @@ inline static void _slurm_rpc_trigger_get(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_TRIGGER_GET;
@@ -5798,6 +5858,8 @@ inline static void _slurm_rpc_get_topo(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_TOPO_INFO;
@@ -5830,6 +5892,8 @@ inline static void _slurm_rpc_get_powercap(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_POWERCAP_INFO;
@@ -6324,6 +6388,8 @@ inline static void _slurm_rpc_dump_spank(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONCE_SPANK_ENVIRONMENT;
@@ -6417,6 +6483,8 @@ inline static void _slurm_rpc_burst_buffer_status(slurm_msg_t *msg)
slurm_msg_t_init(&response_msg);
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_BURST_BUFFER_STATUS;
@@ -6438,6 +6506,8 @@ inline static void _slurm_rpc_control_status(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_CONTROL_STATUS;
@@ -6473,6 +6543,8 @@ inline static void _slurm_rpc_dump_stats(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_STATS_INFO;
@@ -6536,6 +6608,8 @@ _slurm_rpc_dump_licenses(slurm_msg_t * msg)
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_LICENSE_INFO;
@@ -6751,6 +6825,8 @@ static void _slurm_rpc_composite_msg(slurm_msg_t *msg)
slurm_msg_t_init(&resp_msg);
resp_msg.flags = msg->flags;
resp_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&resp_msg, msg->auth_uid);
memcpy(&resp_msg.address, &comp_msg->sender,
sizeof(slurm_addr_t));
resp_msg.msg_type = RESPONSE_MESSAGE_COMPOSITE;
@@ -6836,6 +6912,8 @@ static void _slurm_rpc_comp_msg_list(composite_msg_t * comp_msg,
resp_msg->flags = next_msg->flags;
resp_msg->protocol_version =
next_msg->protocol_version;
+ if (next_msg->auth_uid_set)
+ slurm_msg_set_r_uid(resp_msg, next_msg->auth_uid);
resp_msg->msg_type = RESPONSE_MESSAGE_COMPOSITE;
/* You can't just set the
* resp_msg->address here, it won't
@@ -6911,6 +6989,8 @@ static void _slurm_rpc_assoc_mgr_info(slurm_msg_t * msg)
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_ASSOC_MGR_INFO;
@@ -7180,6 +7260,8 @@ static void _proc_multi_msg(uint32_t rpc_uid, slurm_msg_t *msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_CTLD_MULT_MSG;
--
2.35.3
From f473ec8ed79c23b7d1f553052cd13c4573431c50 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Mon, 16 May 2022 14:33:47 +0200
Subject: [PATCH 23/30] Add missing auth_uid init setting
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/slurmctld/proc_req.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c
index 5b0a350201..173898ca59 100644
--- a/src/slurmctld/proc_req.c
+++ b/src/slurmctld/proc_req.c
@@ -7021,6 +7021,9 @@ static int _process_persist_conn(void *arg,
slurm_msg_t_init(&msg);
msg.auth_cred = persist_conn->auth_cred;
+ msg.auth_uid = *uid;
+ msg.auth_uid_set = true;
+
msg.conn = persist_conn;
msg.conn_fd = persist_conn->fd;
--
2.35.3
From 31a1137586a8ed223ef9db7b9f9c9c2838f5c7bc Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Wed, 18 May 2022 15:09:34 +0200
Subject: [PATCH 24/30] Add missing set_agent_arg_r_uid()
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/plugins/checkpoint/blcr/checkpoint_blcr.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/plugins/checkpoint/blcr/checkpoint_blcr.c b/src/plugins/checkpoint/blcr/checkpoint_blcr.c
index 8696639ce7..90a8b4d17e 100644
--- a/src/plugins/checkpoint/blcr/checkpoint_blcr.c
+++ b/src/plugins/checkpoint/blcr/checkpoint_blcr.c
@@ -583,6 +583,7 @@ static void _send_sig(uint32_t job_id, uint32_t step_id, uint16_t signal,
}
hostlist_iterator_destroy(hi);
+ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_args);
}
--
2.35.3
From f98bec90f959569d32474d56397290044bc9c0fe Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Wed, 18 May 2022 20:32:30 +0200
Subject: [PATCH 25/30] Add missing slurm_msg_set_r_uid() to
_slurm_rpc_control_status()
The fixed function has been removed from later versions of the code.
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/slurmctld/backup.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/slurmctld/backup.c b/src/slurmctld/backup.c
index 8b9df50184..bf4465652d 100644
--- a/src/slurmctld/backup.c
+++ b/src/slurmctld/backup.c
@@ -424,6 +424,8 @@ inline static void _slurm_rpc_control_status(slurm_msg_t * msg)
response_msg.protocol_version = msg->protocol_version;
response_msg.address = msg->address;
response_msg.conn = msg->conn;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.msg_type = RESPONSE_CONTROL_STATUS;
response_msg.data = &data;
response_msg.data_size = sizeof(control_status_msg_t);
--
2.35.3
From 6ceaf75b2d253f33e152a8a0be3b32e7adaf2f75 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Wed, 18 May 2022 20:35:02 +0200
Subject: [PATCH 26/30] Add slurm_msg_set_r_uid() to
_persist_fed_job_lock_bool() and _agent_thread()
This seems to be missing from upstream as well.
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/slurmctld/fed_mgr.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/slurmctld/fed_mgr.c b/src/slurmctld/fed_mgr.c
index 28bda763f5..9441cd3ea5 100644
--- a/src/slurmctld/fed_mgr.c
+++ b/src/slurmctld/fed_mgr.c
@@ -1038,6 +1038,7 @@ static int _persist_fed_job_lock_bool(slurmdb_cluster_rec_t *conn,
req_msg.protocol_version = conn->rpc_version;
req_msg.data = &sib_msg;
+ slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY);
if (_send_recv_msg(conn, &req_msg, &resp_msg, false)) {
rc = SLURM_PROTOCOL_ERROR;
@@ -2278,6 +2279,7 @@ static void *_agent_thread(void *arg)
slurm_msg_t_init(&req_msg);
req_msg.msg_type = REQUEST_CTLD_MULT_MSG;
req_msg.data = &ctld_req_msg;
+ slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY);
rc = _send_recv_msg(cluster, &req_msg, &resp_msg,
false);
--
2.35.3
From 176992e38c1a9c23196831e78e78556b3cf6feb6 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Wed, 18 May 2022 20:52:16 +0200
Subject: [PATCH 27/30] Fix slurm_msg_set_r_uid() settings for checkpointing
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/checkpoint.c | 1 +
src/slurmctld/job_mgr.c | 2 ++
src/slurmctld/step_mgr.c | 3 +++
3 files changed, 6 insertions(+)
diff --git a/src/common/checkpoint.c b/src/common/checkpoint.c
index 8e805043da..3c96e55282 100644
--- a/src/common/checkpoint.c
+++ b/src/common/checkpoint.c
@@ -359,6 +359,7 @@ extern int checkpoint_tasks (uint32_t job_id, uint32_t step_id,
ckpt_req.image_dir = image_dir;
req_msg.msg_type = REQUEST_CHECKPOINT_TASKS;
req_msg.data = &ckpt_req;
+ slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY);
if ((ret_list = slurm_send_recv_msgs(nodelist, &req_msg, (wait*1000),
false))) {
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index 6070b55f9f..a501ee6e85 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -17116,6 +17116,7 @@ extern int job_checkpoint(checkpoint_msg_t *ckpt_ptr, uid_t uid,
if (conn_fd < 0) /* periodic checkpoint */
return rc;
+ slurm_msg_set_r_uid(&resp_msg, uid);
if ((rc == SLURM_SUCCESS) &&
((ckpt_ptr->op == CHECK_ABLE) || (ckpt_ptr->op == CHECK_ERROR))) {
resp_msg.msg_type = RESPONSE_CHECKPOINT;
@@ -17536,6 +17537,7 @@ extern int job_restart(checkpoint_msg_t *ckpt_ptr, uid_t uid, int conn_fd,
rc_msg.return_code = rc;
resp_msg.msg_type = RESPONSE_SLURM_RC;
resp_msg.data = &rc_msg;
+ slurm_msg_set_r_uid(&resp_msg, uid);
(void) slurm_send_node_msg(conn_fd, &resp_msg);
return rc;
diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c
index 5ba6386daa..6bd4a1608c 100644
--- a/src/slurmctld/step_mgr.c
+++ b/src/slurmctld/step_mgr.c
@@ -3427,6 +3427,7 @@ extern int job_step_checkpoint(checkpoint_msg_t *ckpt_ptr,
}
reply:
+ slurm_msg_set_r_uid(&resp_msg, uid);
if ((rc == SLURM_SUCCESS) &&
((ckpt_ptr->op == CHECK_ABLE) || (ckpt_ptr->op == CHECK_ERROR))) {
resp_msg.msg_type = RESPONSE_CHECKPOINT;
@@ -3496,6 +3497,7 @@ extern int job_step_checkpoint_comp(checkpoint_comp_msg_t *ckpt_ptr,
rc_msg.return_code = rc;
resp_msg.msg_type = RESPONSE_SLURM_RC;
resp_msg.data = &rc_msg;
+ slurm_msg_set_r_uid(&resp_msg, uid);
(void) slurm_send_node_msg(conn_fd, &resp_msg);
return rc;
}
@@ -3555,6 +3557,7 @@ extern int job_step_checkpoint_task_comp(checkpoint_task_comp_msg_t *ckpt_ptr,
rc_msg.return_code = rc;
resp_msg.msg_type = RESPONSE_SLURM_RC;
resp_msg.data = &rc_msg;
+ slurm_msg_set_r_uid(&resp_msg, uid);
(void) slurm_send_node_msg(conn_fd, &resp_msg);
return rc;
}
--
2.35.3
From 3b738601fe970af4b4439466c4e2852878817585 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Thu, 19 May 2022 09:48:08 +0200
Subject: [PATCH 28/30] Add missing calls to slurm_msg_set_r_uid()
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_protocol_api.c | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index 1e17d301ab..cfdd29ce15 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -4547,7 +4547,14 @@ int slurm_send_rc_err_msg(slurm_msg_t *msg, int rc, char *err_msg)
resp_msg.forward_struct = msg->forward_struct;
resp_msg.ret_list = msg->ret_list;
resp_msg.orig_addr = msg->orig_addr;
-
+ /* like _resp_msg_setup() */
+ if (!msg->auth_uid_set)
+ slurm_msg_set_r_uid(&resp_msg, SLURM_AUTH_NOBODY);
+ else if ((msg->auth_uid != slurm_get_slurm_user_id()) &&
+ (msg->auth_uid != slurm_get_slurmd_user_id()))
+ slurm_msg_set_r_uid(&resp_msg, msg->auth_uid);
+ else
+ slurm_msg_set_r_uid(&resp_msg, SLURM_AUTH_UID_ANY);
/* send message */
return slurm_send_node_msg(msg->conn_fd, &resp_msg);
}
@@ -4584,6 +4591,14 @@ int slurm_send_reroute_msg(slurm_msg_t *msg, slurmdb_cluster_rec_t *cluster_rec)
resp_msg.ret_list = msg->ret_list;
resp_msg.orig_addr = msg->orig_addr;
+ /* like _resp_msg_setup() */
+ if (!msg->auth_uid_set)
+ slurm_msg_set_r_uid(&resp_msg, SLURM_AUTH_NOBODY);
+ else if ((msg->auth_uid != slurm_get_slurm_user_id()) &&
+ (msg->auth_uid != slurm_get_slurmd_user_id()))
+ slurm_msg_set_r_uid(&resp_msg, msg->auth_uid);
+ else
+ slurm_msg_set_r_uid(&resp_msg, SLURM_AUTH_UID_ANY);
/* send message */
return slurm_send_node_msg(msg->conn_fd, &resp_msg);
}
--
2.35.3
From 651728a59d56c82ac27824eaefce2014d46f0101 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Thu, 19 May 2022 22:13:56 +0200
Subject: [PATCH 29/30] Add slurm_msg_set_r_uid() for replace_batch_job() which
as been deleted since
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/slurmctld/job_scheduler.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c
index 987bda8507..e427769d68 100644
--- a/src/slurmctld/job_scheduler.c
+++ b/src/slurmctld/job_scheduler.c
@@ -966,6 +966,7 @@ send_reply:
resp_msg->address = msg->address;
resp_msg->msg_type = REQUEST_BATCH_JOB_LAUNCH;
resp_msg->data = launch_msg;
+ slurm_msg_set_r_uid(resp_msg, msg->auth_uid);
list_append(msg->ret_list, resp_msg);
} else {
slurm_msg_t response_msg;
@@ -975,6 +976,7 @@ send_reply:
response_msg.address = msg->address;
response_msg.msg_type = REQUEST_BATCH_JOB_LAUNCH;
response_msg.data = launch_msg;
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
slurm_send_node_msg(msg->conn_fd, &response_msg);
slurm_free_job_launch_msg(launch_msg);
}
--
2.35.3
From 7b3568ff6dea329368e797fb1f16012e3ac5e59e Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Wed, 10 Aug 2022 16:37:38 +0200
Subject: [PATCH 30/30] Make sure r_uid is set SLURM_AUTH_UID_ANY for database
access
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/plugins/accounting_storage/slurmdbd/slurmdbd_agent.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/plugins/accounting_storage/slurmdbd/slurmdbd_agent.c b/src/plugins/accounting_storage/slurmdbd/slurmdbd_agent.c
index 52ebf46964..f06561a31c 100644
--- a/src/plugins/accounting_storage/slurmdbd/slurmdbd_agent.c
+++ b/src/plugins/accounting_storage/slurmdbd/slurmdbd_agent.c
@@ -512,6 +512,7 @@ static void _open_slurmdbd_conn(bool need_db)
slurm_set_accounting_storage_port(
slurmdbd_conn->rem_port);
}
+ slurmdbd_conn->r_uid = SLURM_AUTH_UID_ANY;
}
slurmdbd_shutdown = 0;
slurmdbd_conn->shutdown = &slurmdbd_shutdown;
--
2.35.3