File Prevent_credential_abuse.patch of Package slurm.26118
From 85e8b61bf8c91358560db4dc10b016ba4cb5ca8a Mon Sep 17 00:00:00 2001
From: Dominik Bartkiewicz <bart@schedmd.com>
Date: Wed, 4 May 2022 13:06:36 -0600
Subject: [PATCH 01/31] Prevent credential abuse.
CVE-2022-29500
---
NEWS | 1 +
src/api/config_info.c | 1 +
src/api/job_info.c | 2 +
src/api/job_step_info.c | 2 +
src/api/node_info.c | 1 +
src/api/pmi_server.c | 1 +
src/api/reconfigure.c | 1 +
src/api/signal.c | 3 +
src/api/slurm_pmi.c | 3 +
src/api/step_launch.c | 2 +
src/bcast/file_bcast.c | 1 +
src/common/forward.c | 5 +-
src/common/slurm_auth.c | 34 ++-
src/common/slurm_auth.h | 16 +-
src/common/slurm_persist_conn.c | 1 +
src/common/slurm_persist_conn.h | 1 +
src/common/slurm_protocol_api.c | 224 +++++++++++++++++-
src/common/slurm_protocol_api.h | 3 +
src/common/slurm_protocol_defs.c | 4 +
src/common/slurm_protocol_defs.h | 14 ++
src/common/slurmdb_defs.c | 1 +
src/common/slurmdbd_defs.c | 1 +
src/common/stepd_api.c | 3 +-
src/common/stepd_api.h | 3 +-
.../accounting_storage/common/common_as.c | 1 +
src/plugins/auth/munge/auth_munge.c | 64 +++--
src/plugins/auth/none/auth_none.c | 17 +-
src/plugins/mpi/pmi2/setup.c | 2 +
src/plugins/mpi/pmi2/setup.h | 1 +
src/plugins/mpi/pmi2/spawn.c | 3 +-
src/plugins/mpi/pmix/pmixp_dconn.c | 1 +
src/plugins/mpi/pmix/pmixp_dconn.h | 3 +
src/plugins/mpi/pmix/pmixp_server.c | 27 ++-
src/plugins/mpi/pmix/pmixp_utils.c | 1 +
src/sattach/sattach.c | 1 +
src/slurmctld/agent.c | 19 ++
src/slurmctld/agent.h | 5 +
src/slurmctld/backup.c | 9 +-
src/slurmctld/controller.c | 1 +
src/slurmctld/fed_mgr.c | 2 +
src/slurmctld/job_mgr.c | 13 +-
src/slurmctld/job_scheduler.c | 2 +
src/slurmctld/node_mgr.c | 2 +-
src/slurmctld/node_scheduler.c | 3 +
src/slurmctld/ping_nodes.c | 4 +
src/slurmctld/srun_comm.c | 37 ++-
src/slurmctld/step_mgr.c | 3 +
src/slurmd/slurmd/req.c | 21 +-
src/slurmd/slurmd/slurmd.c | 6 +-
src/slurmd/slurmstepd/io.c | 1 +
src/slurmd/slurmstepd/mgr.c | 16 +-
src/slurmd/slurmstepd/mgr.h | 2 +-
src/slurmd/slurmstepd/req.c | 1 +
src/slurmd/slurmstepd/slurmstepd.c | 23 +-
src/slurmd/slurmstepd/slurmstepd_job.c | 7 +-
src/slurmd/slurmstepd/slurmstepd_job.h | 4 +-
src/slurmd/slurmstepd/x11_forwarding.c | 4 +
src/slurmdbd/read_config.c | 1 +
src/slurmdbd/slurmdbd.c | 1 +
59 files changed, 554 insertions(+), 82 deletions(-)
diff --git a/NEWS b/NEWS
index 3b7b68a3c6..d6625715ce 100644
--- a/NEWS
+++ b/NEWS
@@ -1,6 +1,7 @@
This file describes changes in recent versions of Slurm. It primarily
documents those changes that are of interest to users and administrators.
+ -- CVE-2022-29500 - Prevent credential abuse.
-- CVE-2022-29501 - Prevent abuse of REQUEST_FORWARD_DATA.
* Changes in Slurm 17.11.13-2
diff --git a/src/api/config_info.c b/src/api/config_info.c
index 786fdfff98..9dc735af35 100644
--- a/src/api/config_info.c
+++ b/src/api/config_info.c
@@ -1817,6 +1817,7 @@ slurm_load_slurmd_status(slurmd_status_t **slurmd_status_ptr)
}
req_msg.msg_type = REQUEST_DAEMON_STATUS;
req_msg.data = NULL;
+ slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY);
rc = slurm_send_recv_node_msg(&req_msg, &resp_msg, 0);
diff --git a/src/api/job_info.c b/src/api/job_info.c
index f5b5219076..e2845f3913 100644
--- a/src/api/job_info.c
+++ b/src/api/job_info.c
@@ -1512,6 +1512,7 @@ slurm_pid2jobid (pid_t job_pid, uint32_t *jobid)
req.job_pid = job_pid;
req_msg.msg_type = REQUEST_JOB_ID;
req_msg.data = &req;
+ slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY);
rc = slurm_send_recv_node_msg(&req_msg, &resp_msg, 0);
@@ -1886,6 +1887,7 @@ slurm_network_callerid (network_callerid_msg_t req, uint32_t *job_id,
req_msg.msg_type = REQUEST_NETWORK_CALLERID;
req_msg.data = &req;
+ slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY);
if (slurm_send_recv_node_msg(&req_msg, &resp_msg, 0) < 0)
return SLURM_ERROR;
diff --git a/src/api/job_step_info.c b/src/api/job_step_info.c
index fb4a6cbf32..6529951a95 100644
--- a/src/api/job_step_info.c
+++ b/src/api/job_step_info.c
@@ -613,6 +613,7 @@ extern int slurm_job_step_stat(uint32_t job_id, uint32_t step_id,
job_id, step_id, node_list);
slurm_msg_t_init(&req_msg);
+ slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY);
memset(&req, 0, sizeof(job_step_id_msg_t));
resp_out->job_id = req.job_id = job_id;
@@ -729,6 +730,7 @@ extern int slurm_job_step_get_pids(uint32_t job_id, uint32_t step_id,
job_id, step_id, node_list);
slurm_msg_t_init(&req_msg);
+ slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY);
memset(&req, 0, sizeof(job_step_id_msg_t));
resp_out->job_id = req.job_id = job_id;
diff --git a/src/api/node_info.c b/src/api/node_info.c
index 2605464b21..0b0a9de697 100644
--- a/src/api/node_info.c
+++ b/src/api/node_info.c
@@ -877,6 +877,7 @@ extern int slurm_get_node_energy(char *host, uint16_t delta,
req.delta = delta;
req_msg.msg_type = REQUEST_ACCT_GATHER_ENERGY;
req_msg.data = &req;
+ slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY);
rc = slurm_send_recv_node_msg(&req_msg, &resp_msg, 0);
diff --git a/src/api/pmi_server.c b/src/api/pmi_server.c
index 4dd58c23f3..4f8316d932 100644
--- a/src/api/pmi_server.c
+++ b/src/api/pmi_server.c
@@ -141,6 +141,7 @@ static void *_msg_thread(void *x)
slurm_msg_t msg_send;
slurm_msg_t_init(&msg_send);
+ slurm_msg_set_r_uid(&msg_send, SLURM_AUTH_UID_ANY);
debug2("KVS_Barrier msg to %s:%hu",
msg_arg_ptr->bar_ptr->hostname,
diff --git a/src/api/reconfigure.c b/src/api/reconfigure.c
index 190b7b9d73..a53172e743 100644
--- a/src/api/reconfigure.c
+++ b/src/api/reconfigure.c
@@ -160,6 +160,7 @@ _send_message_controller (enum controller_id dest, slurm_msg_t *req)
if ((fd = slurm_open_controller_conn_spec(dest,working_cluster_rec)) <0)
slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_CONNECTION_ERROR);
+ slurm_msg_set_r_uid(req, slurm_conf.slurm_user_id);
if (slurm_send_node_msg(fd, req) < 0) {
slurm_shutdown_msg_conn(fd);
slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_SEND_ERROR);
diff --git a/src/api/signal.c b/src/api/signal.c
index 3c2917c225..d829db36a5 100644
--- a/src/api/signal.c
+++ b/src/api/signal.c
@@ -59,6 +59,7 @@ static int _local_send_recv_rc_msgs(const char *nodelist,
slurm_msg_t *msg = xmalloc(sizeof(slurm_msg_t));
slurm_msg_t_init(msg);
+ slurm_msg_set_r_uid(msg, SLURM_AUTH_UID_ANY);
msg->msg_type = type;
msg->data = data;
@@ -100,6 +101,7 @@ static int _signal_batch_script_step(const resource_allocation_response_msg_t
rpc.flags = KILL_JOB_BATCH;
slurm_msg_t_init(&msg);
+ slurm_msg_set_r_uid(&msg, slurm_conf.slurmd_user_id);
msg.msg_type = REQUEST_SIGNAL_TASKS;
msg.data = &rpc;
if (slurm_conf_get_addr(name, &msg.address) == SLURM_ERROR) {
@@ -154,6 +156,7 @@ static int _terminate_batch_script_step(const resource_allocation_response_msg_t
slurm_msg_t_init(&msg);
msg.msg_type = REQUEST_TERMINATE_TASKS;
+ slurm_msg_set_r_uid(&msg, slurm_conf.slurmd_user_id);
msg.data = &rpc;
if (slurm_conf_get_addr(name, &msg.address) == SLURM_ERROR) {
diff --git a/src/api/slurm_pmi.c b/src/api/slurm_pmi.c
index a8ad173db7..ed7cf93460 100644
--- a/src/api/slurm_pmi.c
+++ b/src/api/slurm_pmi.c
@@ -178,6 +178,7 @@ int slurm_send_kvs_comm_set(kvs_comm_set_t *kvs_set_ptr,
_set_pmi_time();
slurm_msg_t_init(&msg_send);
+ slurm_msg_set_r_uid(&msg_send, SLURM_AUTH_UID_ANY);
msg_send.address = srun_addr;
msg_send.msg_type = PMI_KVS_PUT_REQ;
msg_send.data = (void *) kvs_set_ptr;
@@ -259,6 +260,7 @@ int slurm_get_kvs_comm_set(kvs_comm_set_t **kvs_set_ptr,
data.port = port;
data.hostname = hostname;
slurm_msg_t_init(&msg_send);
+ slurm_msg_set_r_uid(&msg_send, SLURM_AUTH_UID_ANY);
slurm_msg_t_init(&msg_rcv);
msg_send.address = srun_addr;
msg_send.msg_type = PMI_KVS_GET_REQ;
@@ -343,6 +345,7 @@ static int _forward_comm_set(kvs_comm_set_t *kvs_set_ptr)
if (kvs_set_ptr->kvs_host_ptr[i].port == 0)
continue; /* empty */
slurm_msg_t_init(&msg_send);
+ slurm_msg_set_r_uid(&msg_send, SLURM_AUTH_UID_ANY);
msg_send.msg_type = PMI_KVS_GET_RESP;
msg_send.data = (void *) kvs_set_ptr;
slurm_set_addr(&msg_send.address,
diff --git a/src/api/step_launch.c b/src/api/step_launch.c
index abebc0d69b..2a8cf4c6bc 100644
--- a/src/api/step_launch.c
+++ b/src/api/step_launch.c
@@ -910,6 +910,7 @@ extern void slurm_step_launch_fwd_signal(slurm_step_ctx_t *ctx, int signo)
hostlist_destroy(hl);
RESEND: slurm_msg_t_init(&req);
+ slurm_msg_set_r_uid(&req, SLURM_AUTH_UID_ANY);
req.msg_type = REQUEST_SIGNAL_TASKS;
req.data = &msg;
@@ -1742,6 +1743,7 @@ static int _launch_tasks(slurm_step_ctx_t *ctx,
}
slurm_msg_t_init(&msg);
+ slurm_msg_set_r_uid(&msg, SLURM_AUTH_UID_ANY);
msg.msg_type = REQUEST_LAUNCH_TASKS;
msg.data = launch_msg;
diff --git a/src/bcast/file_bcast.c b/src/bcast/file_bcast.c
index e4254832c7..d517e9c9ba 100644
--- a/src/bcast/file_bcast.c
+++ b/src/bcast/file_bcast.c
@@ -190,6 +190,7 @@ static int _file_bcast(struct bcast_parameters *params,
slurm_msg_t msg;
slurm_msg_t_init(&msg);
+ slurm_msg_set_r_uid(&msg, SLURM_AUTH_UID_ANY);
msg.data = bcast_msg;
msg.msg_type = REQUEST_FILE_BCAST;
diff --git a/src/common/forward.c b/src/common/forward.c
index 1fbeeb0d11..2b2796343c 100644
--- a/src/common/forward.c
+++ b/src/common/forward.c
@@ -248,7 +248,7 @@ void *_forward_thread(void *arg)
/* steps, fwd_msg->timeout); */
}
- ret_list = slurm_receive_msgs(fd, steps, fwd_msg->timeout);
+ ret_list = slurm_receive_resp_msgs(fd, steps, fwd_msg->timeout);
/* info("sent %d forwards got %d back", */
/* fwd_msg->header.forward.cnt, list_count(ret_list)); */
@@ -356,6 +356,9 @@ void *_fwd_tree_thread(void *arg)
send_msg.msg_type = fwd_tree->orig_msg->msg_type;
send_msg.data = fwd_tree->orig_msg->data;
send_msg.protocol_version = fwd_tree->orig_msg->protocol_version;
+ if (fwd_tree->orig_msg->restrict_uid_set)
+ slurm_msg_set_r_uid(&send_msg,
+ fwd_tree->orig_msg->restrict_uid);
/* repeat until we are sure the message was sent */
while ((name = hostlist_shift(fwd_tree->tree_hl))) {
diff --git a/src/common/slurm_auth.c b/src/common/slurm_auth.c
index f7811e71aa..7ce0887ead 100644
--- a/src/common/slurm_auth.c
+++ b/src/common/slurm_auth.c
@@ -60,7 +60,8 @@ static bool init_run = false;
* end of the structure.
*/
typedef struct slurm_auth_ops {
- void * (*create) ( char *auth_info );
+ void * (*create) ( char *auth_infouid_t, uid_t r_uid,
+ void *data, int dlen);
int (*destroy) ( void *cred );
int (*verify) ( void *cred, char *auth_info );
uid_t (*get_uid) ( void *cred, char *auth_info );
@@ -70,6 +71,9 @@ typedef struct slurm_auth_ops {
int (*print) ( void *cred, FILE *fp );
int (*sa_errno) ( void *cred );
const char * (*sa_errstr) ( int slurm_errno );
+ bool (*hash_enable);
+ int (*get_data) (void *cred, char **data,
+ uint32_t *len);
} slurm_auth_ops_t;
/*
* These strings must be kept in the same order as the fields
@@ -85,7 +89,9 @@ static const char *syms[] = {
"slurm_auth_unpack",
"slurm_auth_print",
"slurm_auth_errno",
- "slurm_auth_errstr"
+ "slurm_auth_errstr",
+ "hash_enable",
+ "auth_p_get_data"
};
/*
@@ -125,6 +131,15 @@ slurm_auth_generic_errstr( int slurm_errno )
}
}
+extern bool slurm_get_plugin_hash_enable(int index)
+{
+ if (slurm_auth_init(NULL) < 0)
+ return true;
+
+ return *(ops[index].hash_enable);
+
+}
+
extern int slurm_auth_init( char *auth_type )
{
int retval = SLURM_SUCCESS;
@@ -182,12 +197,13 @@ slurm_auth_fini( void )
* the API function dispatcher.
*/
-void *g_slurm_auth_create(char *auth_info)
+void *g_slurm_auth_create(char *auth_info, uid_t r_uid,
+ void *data, int dlen)
{
if (slurm_auth_init(NULL) < 0)
return NULL;
- return (*(ops.create))(auth_info);
+ return (*(ops.create))(auth_info, r_uid, data, dlen);
}
int g_slurm_auth_destroy(void *cred)
@@ -206,6 +222,16 @@ int g_slurm_auth_verify(void *cred, char *auth_info)
return (*(ops.verify))(cred, auth_info);
}
+int auth_g_get_data(void *cred, char **data, uint32_t *len)
+{
+ cred_wrapper_t *wrap = (cred_wrapper_t *) cred;
+
+ if (!wrap || slurm_auth_init(NULL) < 0)
+ return SLURM_ERROR;
+
+ return (*(ops[wrap->index].get_data))(cred, data, len);
+}
+
uid_t g_slurm_auth_get_uid(void *cred, char *auth_info)
{
if (slurm_auth_init(NULL) < 0)
diff --git a/src/common/slurm_auth.h b/src/common/slurm_auth.h
index b430809ca5..f7ab7ac68e 100644
--- a/src/common/slurm_auth.h
+++ b/src/common/slurm_auth.h
@@ -88,6 +88,12 @@ enum {
ARG_COUNT,
};
+/*
+ * This should be equal to MUNGE_UID_ANY
+ * do not restrict decode via uid
+ */
+#define SLURM_AUTH_UID_ANY -1
+
/*
* SLURM authentication context opaque type.
*/
@@ -128,15 +134,23 @@ extern int slurm_auth_init( char *auth_type );
*/
extern int slurm_auth_fini( void );
+/*
+ * Check if plugin type corresponding to the authentication
+ * plugin index supports hash.
+ */
+extern bool slurm_get_plugin_hash_enable(int index);
+
/*
* Static bindings for the global authentication context.
*/
-extern void * g_slurm_auth_create(char *auth_info);
+extern void * g_slurm_auth_create(char *auth_info, uid_t r_uid,
+ void *data, int dlen);
extern int g_slurm_auth_destroy( void *cred );
extern int g_slurm_auth_verify(void *cred, char *auth_info);
extern uid_t g_slurm_auth_get_uid( void *cred, char *auth_info );
extern gid_t g_slurm_auth_get_gid( void *cred, char *auth_info );
extern int g_slurm_auth_pack( void *cred, Buf buf );
+extern int auth_g_get_data(void *cred, char **data, uint32_t *len);
/*
* WARNING! The returned auth pointer WILL have pointers
diff --git a/src/common/slurm_persist_conn.c b/src/common/slurm_persist_conn.c
index a15624b219..75e01e8c84 100644
--- a/src/common/slurm_persist_conn.c
+++ b/src/common/slurm_persist_conn.c
@@ -575,6 +575,7 @@ extern int slurm_persist_conn_open(slurm_persist_conn_t *persist_conn)
req_msg.flags |= SLURM_GLOBAL_AUTH_KEY;
if (persist_conn->flags & PERSIST_FLAG_DBD)
req_msg.flags |= SLURMDBD_CONNECTION;
+ slurm_msg_set_r_uid(&req_msg, persist_conn->r_uid);
memset(&req, 0, sizeof(persist_init_req_msg_t));
req.cluster_name = persist_conn->cluster_name;
diff --git a/src/common/slurm_persist_conn.h b/src/common/slurm_persist_conn.h
index ba6df4c904..30a01ef398 100644
--- a/src/common/slurm_persist_conn.h
+++ b/src/common/slurm_persist_conn.h
@@ -72,6 +72,7 @@ typedef struct {
uint16_t flags;
bool inited;
persist_conn_type_t persist_type;
+ uid_t r_uid;
char *rem_host;
uint16_t rem_port;
time_t *shutdown;
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index 707eedb8e7..01ea2648ba 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -884,6 +884,36 @@ char *slurm_get_priority_weight_tres(void)
return weights;
}
+static int _check_hash(buf_t *buffer, header_t *header, slurm_msg_t *msg,
+ void *cred)
+{
+ char *cred_hash = NULL;
+ uint32_t cred_hash_len = 0;
+ int rc;
+ static time_t config_update = (time_t) -1;
+ static bool block_null_hash = true;
+
+ if (config_update != slurm_conf.last_update) {
+ block_null_hash = (xstrcasestr(slurm_conf.comm_params,
+ "block_null_hash"));
+ config_update = slurm_conf.last_update;
+ }
+
+ rc = auth_g_get_data(cred, &cred_hash, &cred_hash_len);
+
+ if (cred_hash || cred_hash_len) {
+ if (cred_hash_len != 3 || cred_hash[0] != 1 ||
+ memcmp(cred_hash + 1,
+ &msg->msg_type, sizeof(msg->msg_type)))
+ rc = SLURM_ERROR;
+ } else if (block_null_hash &&
+ slurm_get_plugin_hash_enable(msg->auth_index))
+ rc = SLURM_ERROR;
+
+ xfree(cred_hash);
+ return rc;
+}
+
static int _get_tres_id(char *type, char *name)
{
slurmdb_tres_rec_t tres_rec;
@@ -3209,6 +3239,9 @@ extern int slurm_unpack_received_msg(slurm_msg_t *msg, int fd, Buf buffer)
goto total_return;
}
+ msg->auth_uid = g_slurm_auth_get_uid(auth_cred);
+ msg->auth_uid_set = true;
+
/*
* Unpack message body
*/
@@ -3219,6 +3252,7 @@ extern int slurm_unpack_received_msg(slurm_msg_t *msg, int fd, Buf buffer)
msg->body_offset = get_buf_offset(buffer);
if ((header.body_length > remaining_buf(buffer)) ||
+ _check_hash(buffer, &header, msg, auth_cred) ||
(unpack_msg(msg, buffer) != SLURM_SUCCESS)) {
rc = ESLURM_PROTOCOL_INCOMPLETE_PACKET;
(void) g_slurm_auth_destroy(auth_cred);
@@ -3319,6 +3353,8 @@ int slurm_receive_msg(int fd, slurm_msg_t *msg, int timeout)
*/
if (slurm_msg_recvfrom_timeout(fd, &buf, &buflen, 0, timeout) < 0) {
rc = errno;
+ if (!rc)
+ rc = SLURMCTLD_COMMUNICATIONS_RECEIVE_ERROR;
goto endit;
}
@@ -3481,6 +3517,8 @@ List slurm_receive_msgs(int fd, int steps, int timeout)
goto total_return;
}
+ msg.auth_uid = g_slurm_auth_get_uid(auth_cred);
+ msg.auth_uid_set = true;
/*
* Unpack message body
*/
@@ -3489,6 +3527,7 @@ List slurm_receive_msgs(int fd, int steps, int timeout)
msg.flags = header.flags;
if ((header.body_length > remaining_buf(buffer)) ||
+ _check_hash(buffer, &header, &msg, auth_cred) ||
(unpack_msg(&msg, buffer) != SLURM_SUCCESS)) {
(void) g_slurm_auth_destroy(auth_cred);
free_buf(buffer);
@@ -3530,6 +3569,155 @@ total_return:
}
+List slurm_receive_resp_msgs(int fd, int steps, int timeout)
+{
+ char *buf = NULL;
+ size_t buflen = 0;
+ header_t header;
+ int rc;
+ void *auth_cred = NULL;
+ slurm_msg_t msg;
+ buf_t *buffer;
+ ret_data_info_t *ret_data_info = NULL;
+ List ret_list = NULL;
+ int orig_timeout = timeout;
+
+ xassert(fd >= 0);
+
+ slurm_msg_t_init(&msg);
+ msg.conn_fd = fd;
+
+ if (timeout <= 0) {
+ /* convert secs to msec */
+ timeout = slurm_conf.msg_timeout * 1000;
+ orig_timeout = timeout;
+ }
+ if (steps) {
+ if (message_timeout < 0)
+ message_timeout = slurm_conf.msg_timeout * 1000;
+ orig_timeout = (timeout -
+ (message_timeout*(steps-1)))/steps;
+ steps--;
+ }
+
+ log_flag(NET, "%s: orig_timeout was %d we have %d steps and a timeout of %d",
+ __func__, orig_timeout, steps, timeout);
+ /* we compare to the orig_timeout here because that is really
+ * what we are going to wait for each step
+ */
+ if (orig_timeout >= (slurm_conf.msg_timeout * 10000)) {
+ log_flag(NET, "%s: Sending a message with timeout's greater than %d seconds, requested timeout is %d seconds",
+ __func__, (slurm_conf.msg_timeout * 10),
+ (timeout/1000));
+ } else if (orig_timeout < 1000) {
+ log_flag(NET, "%s: Sending a message with a very short timeout of %d milliseconds each step in the tree has %d milliseconds",
+ __func__, timeout, orig_timeout);
+ }
+
+
+ /*
+ * Receive a msg. slurm_msg_recvfrom() will read the message
+ * length and allocate space on the heap for a buffer containing
+ * the message.
+ */
+ if (slurm_msg_recvfrom_timeout(fd, &buf, &buflen, 0, timeout) < 0) {
+ forward_init(&header.forward);
+ rc = errno;
+ goto total_return;
+ }
+
+ log_flag_hex(NET_RAW, buf, buflen, "%s: read", __func__);
+ buffer = create_buf(buf, buflen);
+
+ if (unpack_header(&header, buffer) == SLURM_ERROR) {
+ free_buf(buffer);
+ rc = SLURM_COMMUNICATIONS_RECEIVE_ERROR;
+ goto total_return;
+ }
+
+ if (check_header_version(&header) < 0) {
+ slurm_addr_t resp_addr;
+ if (!slurm_get_peer_addr(fd, &resp_addr)) {
+ error("%s: Invalid Protocol Version %u from at %pA",
+ __func__, header.version, &resp_addr);
+ } else {
+ error("%s: Invalid Protocol Version %u from problem connection: %m",
+ __func__, header.version);
+ }
+
+ free_buf(buffer);
+ rc = SLURM_PROTOCOL_VERSION_ERROR;
+ goto total_return;
+ }
+ //info("ret_cnt = %d",header.ret_cnt);
+ if (header.ret_cnt > 0) {
+ if (header.ret_list)
+ ret_list = header.ret_list;
+ else
+ ret_list = list_create(destroy_data_info);
+ header.ret_cnt = 0;
+ header.ret_list = NULL;
+ }
+
+ /* Forward message to other nodes */
+ if (header.forward.cnt > 0) {
+ error("%s: We need to forward this to other nodes use slurm_receive_msg_and_forward instead",
+ __func__);
+ }
+
+ if (!(auth_cred = g_slurm_auth_unpack(buffer, header.version))) {
+ error("%s: auth_g_unpack: %m", __func__);
+ free_buf(buffer);
+ rc = ESLURM_PROTOCOL_INCOMPLETE_PACKET;
+ goto total_return;
+ }
+ g_slurm_auth_destroy(auth_cred);
+ /*
+ * Unpack message body
+ */
+ msg.protocol_version = header.version;
+ msg.msg_type = header.msg_type;
+ msg.flags = header.flags;
+
+ if ((header.body_length > remaining_buf(buffer)) ||
+ (unpack_msg(&msg, buffer) != SLURM_SUCCESS)) {
+ free_buf(buffer);
+ rc = ESLURM_PROTOCOL_INCOMPLETE_PACKET;
+ goto total_return;
+ }
+ free_buf(buffer);
+ rc = SLURM_SUCCESS;
+
+total_return:
+ destroy_forward(&header.forward);
+
+ if (rc != SLURM_SUCCESS) {
+ if (ret_list) {
+ ret_data_info = xmalloc(sizeof(ret_data_info_t));
+ ret_data_info->err = rc;
+ ret_data_info->type = RESPONSE_FORWARD_FAILED;
+ ret_data_info->data = NULL;
+ list_push(ret_list, ret_data_info);
+ }
+
+ error("%s: failed: %s",
+ __func__, slurm_strerror(rc));
+ usleep(10000); /* Discourage brute force attack */
+ } else {
+ if (!ret_list)
+ ret_list = list_create(destroy_data_info);
+ ret_data_info = xmalloc(sizeof(ret_data_info_t));
+ ret_data_info->err = rc;
+ ret_data_info->node_name = NULL;
+ ret_data_info->type = msg.msg_type;
+ ret_data_info->data = msg.data;
+ list_push(ret_list, ret_data_info);
+ }
+
+ errno = rc;
+ return ret_list;
+
+}
/* try to determine the UID associated with a message with different
* message header version, return -1 if we can't tell */
static int _unpack_msg_uid(Buf buffer)
@@ -3715,6 +3903,9 @@ int slurm_receive_msg_and_forward(int fd, slurm_addr_t *orig_addr,
goto total_return;
}
+ msg->auth_uid = g_slurm_auth_get_uid(auth_cred);
+ msg->auth_uid_set = true;
+
/*
* Unpack message body
*/
@@ -3728,6 +3919,7 @@ int slurm_receive_msg_and_forward(int fd, slurm_addr_t *orig_addr,
}
if ( (header.body_length > remaining_buf(buffer)) ||
+ _check_hash(buffer, &header, msg, auth_cred) ||
(unpack_msg(msg, buffer) != SLURM_SUCCESS) ) {
(void) g_slurm_auth_destroy(auth_cred);
free_buf(buffer);
@@ -3795,6 +3987,7 @@ int slurm_send_node_msg(int fd, slurm_msg_t * msg)
int rc;
void * auth_cred;
time_t start_time = time(NULL);
+ unsigned char auth_payload[3] = { 1 }; /* uint8_t + uint16_t (msg_type) */
if (msg->conn) {
persist_msg_t persist_msg;
@@ -3830,6 +4023,9 @@ int slurm_send_node_msg(int fd, slurm_msg_t * msg)
return rc;
}
+ if (!msg->restrict_uid_set)
+ fatal("%s: restrict_uid is not set", __func__);
+ memcpy(auth_payload + 1, &msg->msg_type, sizeof(msg->msg_type));
/*
* Initialize header with Auth credential and message type.
* We get the credential now rather than later so the work can
@@ -3838,10 +4034,14 @@ int slurm_send_node_msg(int fd, slurm_msg_t * msg)
* wait too long for the incoming message.
*/
if (msg->flags & SLURM_GLOBAL_AUTH_KEY) {
- auth_cred = g_slurm_auth_create(_global_auth_key());
+ auth_cred = g_slurm_auth_create(_global_auth_key(),
+ msg->restrict_uid, auth_payload,
+ sizeof(auth_payload));
} else {
char *auth_info = slurm_get_auth_info();
- auth_cred = g_slurm_auth_create(auth_info);
+ auth_cred = g_slurm_auth_create(auth_info,
+ msg->restrict_uid, auth_payload,
+ sizeof(auth_payload));
xfree(auth_info);
}
@@ -3858,10 +4058,16 @@ int slurm_send_node_msg(int fd, slurm_msg_t * msg)
if (difftime(time(NULL), start_time) >= 60) {
(void) g_slurm_auth_destroy(auth_cred);
if (msg->flags & SLURM_GLOBAL_AUTH_KEY) {
- auth_cred = g_slurm_auth_create(_global_auth_key());
+ auth_cred = g_slurm_auth_create(_global_auth_key(),
+ msg->restrict_uid,
+ auth_payload,
+ sizeof(auth_payload));
} else {
char *auth_info = slurm_get_auth_info();
- auth_cred = g_slurm_auth_create(auth_info);
+ auth_cred = g_slurm_auth_create(auth_info,
+ msg->restrict_uid,
+ auth_payload,
+ sizeof(auth_payload));
xfree(auth_info);
}
}
@@ -4373,6 +4579,7 @@ extern int slurm_send_recv_controller_msg(slurm_msg_t * request_msg,
forward_init(&request_msg->forward, NULL);
request_msg->ret_list = NULL;
request_msg->forward_struct = NULL;
+ slurm_msg_set_r_uid(request_msg, SLURM_AUTH_UID_ANY);
tryagain:
retry = 1;
@@ -4498,6 +4705,8 @@ extern int slurm_send_only_controller_msg(slurm_msg_t *req,
goto cleanup;
}
+ slurm_msg_set_r_uid(req, slurm_conf.slurm_user_id);
+
if ((rc = slurm_send_node_msg(fd, req)) < 0) {
rc = SLURM_ERROR;
} else {
@@ -4866,6 +5075,12 @@ extern void slurm_free_msg(slurm_msg_t *msg)
}
}
+extern void slurm_msg_set_r_uid(slurm_msg_t *msg, uid_t r_uid)
+{
+ msg->restrict_uid = r_uid;
+ msg->restrict_uid_set = true;
+}
+
extern char *nodelist_nth_host(const char *nodelist, int inx)
{
hostlist_t hl = hostlist_create(nodelist);
@@ -5062,6 +5277,7 @@ extern int slurm_forward_data(
req.len = len;
req.data = (char *)data;
+ slurm_msg_set_r_uid(&msg, SLURM_AUTH_UID_ANY);
msg.msg_type = REQUEST_FORWARD_DATA;
msg.data = &req;
diff --git a/src/common/slurm_protocol_api.h b/src/common/slurm_protocol_api.h
index 194d5b137b..8da140c565 100644
--- a/src/common/slurm_protocol_api.h
+++ b/src/common/slurm_protocol_api.h
@@ -1023,6 +1023,7 @@ int slurm_receive_msg(int fd, slurm_msg_t *msg, int timeout);
* errno set.
*/
List slurm_receive_msgs(int fd, int steps, int timeout);
+List slurm_receive_resp_msgs(int fd, int steps, int timeout);
/*
* Receive a slurm message on the open slurm descriptor "fd" waiting
@@ -1333,6 +1334,8 @@ extern int *set_span(int total, uint16_t tree_width);
extern void slurm_free_msg_members(slurm_msg_t *msg);
extern void slurm_free_msg(slurm_msg_t * msg);
+extern void slurm_msg_set_r_uid(slurm_msg_t *msg, uid_t r_uid);
+
/* must free this memory with free not xfree */
extern char *nodelist_nth_host(const char *nodelist, int inx);
extern int nodelist_find(const char *nodelist, const char *name);
diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c
index 0e1d86597c..42297b75c3 100644
--- a/src/common/slurm_protocol_defs.c
+++ b/src/common/slurm_protocol_defs.c
@@ -51,6 +51,7 @@
#include "src/common/power.h"
#include "src/common/slurm_accounting_storage.h"
#include "src/common/slurm_acct_gather_energy.h"
+#include "src/common/slurm_auth.h"
#include "src/common/slurm_cred.h"
#include "src/common/slurm_ext_sensors.h"
#include "src/common/slurm_jobacct_gather.h"
@@ -104,6 +105,7 @@ extern void slurm_msg_t_init(slurm_msg_t *msg)
{
memset(msg, 0, sizeof(slurm_msg_t));
+ msg->auth_uid = SLURM_AUTH_NOBODY;
msg->conn_fd = -1;
msg->msg_type = NO_VAL16;
msg->protocol_version = NO_VAL16;
@@ -133,6 +135,8 @@ extern void slurm_msg_t_copy(slurm_msg_t *dest, slurm_msg_t *src)
dest->ret_list = src->ret_list;
dest->forward_struct = src->forward_struct;
dest->orig_addr.sin_addr.s_addr = 0;
+ if (src->auth_uid_set)
+ slurm_msg_set_r_uid(dest, src->auth_uid);
return;
}
diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h
index d4409ff300..c9fc171912 100644
--- a/src/common/slurm_protocol_defs.h
+++ b/src/common/slurm_protocol_defs.h
@@ -50,6 +50,7 @@
#include "src/common/job_options.h"
#include "src/common/list.h"
#include "src/common/macros.h"
+#include "src/common/slurm_auth.h"
#include "src/common/slurm_cred.h"
#include "src/common/slurm_protocol_common.h"
#include "src/common/slurm_persist_conn.h"
@@ -459,6 +460,19 @@ typedef struct slurm_protocol_config {
typedef struct slurm_msg {
slurm_addr_t address;
void *auth_cred;
+ uid_t auth_uid; /* NEVER PACK. Authenticated uid from auth
+ * credential. Only valid if auth_uid_set is
+ * true. Set to SLURM_AUTH_NOBODY if not set
+ * yet.
+ */
+ bool auth_uid_set; /* NEVER PACK. True when auth_uid has been set.
+ * This is a safety measure against handling
+ * a slurm_msg_t that has been xmalloc()'d but
+ * slurm_msg_t_init() was not called since
+ * auth_uid would be root.
+ */
+ uid_t restrict_uid;
+ bool restrict_uid_set;
uint32_t body_offset; /* DON'T PACK: offset in buffer where body part of
buffer starts. */
Buf buffer; /* DON't PACK! ptr to buffer that msg was unpacked from. */
diff --git a/src/common/slurmdb_defs.c b/src/common/slurmdb_defs.c
index bf3cfb32ec..cb6611159f 100644
--- a/src/common/slurmdb_defs.c
+++ b/src/common/slurmdb_defs.c
@@ -2882,6 +2882,7 @@ extern int slurmdb_send_accounting_update(List update_list, char *cluster,
slurm_set_addr_char(&req.address, port, host);
req.protocol_version = rpc_version;
+ slurm_msg_set_r_uid(&req, SLURM_AUTH_UID_ANY);
req.msg_type = ACCOUNTING_UPDATE_MSG;
if (slurmdbd_conf)
diff --git a/src/common/slurmdbd_defs.c b/src/common/slurmdbd_defs.c
index 9b288e17e3..3ea61902a6 100644
--- a/src/common/slurmdbd_defs.c
+++ b/src/common/slurmdbd_defs.c
@@ -457,6 +457,7 @@ static void _open_slurmdbd_conn(bool need_db)
slurm_set_accounting_storage_port(
slurmdbd_conn->rem_port);
}
+ slurmdbd_conn->r_uid = SLURM_AUTH_UID_ANY;
}
slurmdbd_shutdown = 0;
slurmdbd_conn->shutdown = &slurmdbd_shutdown;
diff --git a/src/common/stepd_api.c b/src/common/stepd_api.c
index 5f15a45cf1..05c0a810b1 100644
--- a/src/common/stepd_api.c
+++ b/src/common/stepd_api.c
@@ -485,7 +485,7 @@ rwfail:
int
stepd_attach(int fd, uint16_t protocol_version,
slurm_addr_t *ioaddr, slurm_addr_t *respaddr,
- void *job_cred_sig, reattach_tasks_response_msg_t *resp)
+ void *job_cred_sig, uid_t uid, reattach_tasks_response_msg_t *resp)
{
int req = REQUEST_ATTACH;
int rc = SLURM_SUCCESS;
@@ -495,6 +495,7 @@ stepd_attach(int fd, uint16_t protocol_version,
safe_write(fd, ioaddr, sizeof(slurm_addr_t));
safe_write(fd, respaddr, sizeof(slurm_addr_t));
safe_write(fd, job_cred_sig, SLURM_IO_KEY_SIZE);
+ safe_write(fd, &uid, sizeof(uid_t));
safe_write(fd, &proto, sizeof(int));
/* Receive the return code */
diff --git a/src/common/stepd_api.h b/src/common/stepd_api.h
index 205cfd3c90..7482979597 100644
--- a/src/common/stepd_api.h
+++ b/src/common/stepd_api.h
@@ -189,7 +189,8 @@ int stepd_signal_container(int fd, uint16_t protocol_version, int signal,
*/
int stepd_attach(int fd, uint16_t protocol_version,
slurm_addr_t *ioaddr, slurm_addr_t *respaddr,
- void *job_cred_sig, reattach_tasks_response_msg_t *resp);
+ void *job_cred_sig, uid_t uid,
+ reattach_tasks_response_msg_t *resp);
/*
* Scan for available running slurm step daemons by checking
diff --git a/src/plugins/accounting_storage/common/common_as.c b/src/plugins/accounting_storage/common/common_as.c
index fc811188e4..d0a62dac01 100644
--- a/src/plugins/accounting_storage/common/common_as.c
+++ b/src/plugins/accounting_storage/common/common_as.c
@@ -385,6 +385,7 @@ extern int cluster_first_reg(char *host, uint16_t port, uint16_t rpc_version)
out_msg.msg_type = ACCOUNTING_FIRST_REG;
out_msg.flags = SLURM_GLOBAL_AUTH_KEY;
out_msg.data = &update;
+ slurm_msg_set_r_uid(&out_msg, SLURM_AUTH_UID_ANY);
slurm_send_node_msg(fd, &out_msg);
/* We probably need to add matching recv_msg function
* for an arbitray fd or should these be fire
diff --git a/src/plugins/auth/munge/auth_munge.c b/src/plugins/auth/munge/auth_munge.c
index 3f98a3343a..fbcd1bbd52 100644
--- a/src/plugins/auth/munge/auth_munge.c
+++ b/src/plugins/auth/munge/auth_munge.c
@@ -84,6 +84,7 @@
const char plugin_name[] = "Munge authentication plugin";
const char plugin_type[] = "auth/munge";
const uint32_t plugin_version = SLURM_VERSION_NUMBER;
+bool hash_enable = true;
static int plugin_errno = SLURM_SUCCESS;
static int bad_cred_test = -1;
@@ -102,12 +103,12 @@ typedef struct _slurm_auth_credential {
int magic; /* magical munge validity magic */
#endif
char *m_str; /* munged string */
- void *buf; /* Application specific data */
bool verified; /* true if this cred has been verified */
- int len; /* amount of App data */
uid_t uid; /* UID. valid only if verified == true */
gid_t gid; /* GID. valid only if verified == true */
int cr_errno;
+ void *data; /* payload data */
+ int dlen; /* payload data length */
} slurm_auth_credential_t;
/*
@@ -154,7 +155,8 @@ int init ( void )
* allocate a credential. Whether the credential is populated with useful
* data at this time is implementation-dependent.
*/
-slurm_auth_credential_t *slurm_auth_create(char *opts)
+slurm_auth_credential_t *slurm_auth_create(char *opts, uid_t r_uid,
+ void *data, int dlen)
{
int rc, retry = RETRY_COUNT, auth_ttl;
slurm_auth_credential_t *cred = NULL;
@@ -191,6 +193,13 @@ slurm_auth_credential_t *slurm_auth_create(char *opts)
}
}
+ rc = munge_ctx_set(ctx, MUNGE_OPT_UID_RESTRICTION, r_uid);
+ if (rc != EMUNGE_SUCCESS) {
+ error("munge_ctx_set failure");
+ munge_ctx_destroy(ctx);
+ return NULL;
+ }
+
auth_ttl = slurm_get_auth_ttl();
if (auth_ttl)
(void) munge_ctx_set(ctx, MUNGE_OPT_TTL, auth_ttl);
@@ -198,8 +207,8 @@ slurm_auth_credential_t *slurm_auth_create(char *opts)
cred = xmalloc(sizeof(*cred));
cred->verified = false;
cred->m_str = NULL;
- cred->buf = NULL;
- cred->len = 0;
+ cred->data = NULL;
+ cred->dlen = 0;
cred->cr_errno = SLURM_SUCCESS;
xassert((cred->magic = MUNGE_MAGIC));
@@ -213,7 +222,7 @@ slurm_auth_credential_t *slurm_auth_create(char *opts)
ohandler = xsignal(SIGALRM, (SigFunc *)SIG_BLOCK);
again:
- err = munge_encode(&cred->m_str, ctx, cred->buf, cred->len);
+ err = munge_encode(&cred->m_str, ctx, data, dlen);
if (err != EMUNGE_SUCCESS) {
if ((err == EMUNGE_SOCKET) && retry--) {
debug("Munge encode failed: %s (retrying ...)",
@@ -258,8 +267,8 @@ slurm_auth_destroy( slurm_auth_credential_t *cred )
*/
if (cred->m_str)
free(cred->m_str);
- if (cred->buf)
- free(cred->buf);
+ if (cred->data)
+ free(cred->data);
xfree(cred);
return SLURM_SUCCESS;
@@ -351,6 +360,34 @@ slurm_auth_get_gid( slurm_auth_credential_t *cred, char *opts )
return cred->gid;
}
+/*
+ * auth_p_verify() must be called first.
+ */
+int auth_p_get_data(slurm_auth_credential_t *cred, char **data, uint32_t *len)
+{
+ if (!cred || !cred->verified) {
+ /*
+ * This xassert will trigger on a development build if
+ * the calling path did not verify the credential first.
+ */
+ xassert(!cred);
+ slurm_seterrno(ESLURM_AUTH_BADARG);
+ return SLURM_ERROR;
+ }
+
+ xassert(cred->magic == MUNGE_MAGIC);
+
+ if (cred->data && cred->dlen) {
+ *data = xmalloc(cred->dlen);
+ memcpy(*data, cred->data, cred->dlen);
+ *len = cred->dlen;
+ } else {
+ *data = NULL;
+ *len = 0;
+ }
+ return SLURM_SUCCESS;
+}
+
/*
* Marshall a credential for transmission over the network, according to
* SLURM's marshalling protocol.
@@ -418,8 +455,6 @@ slurm_auth_unpack( Buf buf )
cred = xmalloc(sizeof(*cred));
cred->verified = false;
cred->m_str = NULL;
- cred->buf = NULL;
- cred->len = 0;
cred->cr_errno = SLURM_SUCCESS;
xassert((cred->magic = MUNGE_MAGIC));
@@ -521,14 +556,9 @@ _decode_cred(slurm_auth_credential_t *c, char *socket)
return SLURM_ERROR;
}
- again:
- c->buf = NULL;
- err = munge_decode(c->m_str, ctx, &c->buf, &c->len, &c->uid, &c->gid);
+again:
+ err = munge_decode(c->m_str, ctx, &c->data, &c->dlen, &c->uid, &c->gid);
if (err != EMUNGE_SUCCESS) {
- if (c->buf) {
- free(c->buf);
- c->buf = NULL;
- }
if ((err == EMUNGE_SOCKET) && retry--) {
debug("Munge decode failed: %s (retrying ...)",
munge_ctx_strerror(ctx));
diff --git a/src/plugins/auth/none/auth_none.c b/src/plugins/auth/none/auth_none.c
index 6bd3beee88..34c7d79b1c 100644
--- a/src/plugins/auth/none/auth_none.c
+++ b/src/plugins/auth/none/auth_none.c
@@ -74,6 +74,7 @@
const char plugin_name[] = "Null authentication plugin";
const char plugin_type[] = "auth/none";
const uint32_t plugin_version = SLURM_VERSION_NUMBER;
+bool hash_enable = false;
/*
* An opaque type representing authentication credentials. This type can be
@@ -148,7 +149,8 @@ extern int fini ( void )
* Allocate and initializes a credential. This function should return
* NULL if it cannot allocate a credential.
*/
-slurm_auth_credential_t *slurm_auth_create(char *auth_info)
+slurm_auth_credential_t *slurm_auth_create(char *auth_info, uid_t r_uid,
+ void *data, int dlen)
{
slurm_auth_credential_t *cred;
cred = xmalloc(sizeof(slurm_auth_credential_t));
@@ -214,6 +216,19 @@ slurm_auth_get_gid( slurm_auth_credential_t *cred, char *auth_info )
}
}
+int auth_p_get_data(slurm_auth_credential_t *cred, char **data, uint32_t *len)
+{
+ if (!cred) {
+ slurm_seterrno(ESLURM_AUTH_BADARG);
+ return SLURM_ERROR;
+ }
+
+ *data = NULL;
+ *len = 0;
+
+ return SLURM_SUCCESS;
+}
+
/*
* Marshall a credential for transmission over the network, according to
* SLURM's marshalling protocol.
diff --git a/src/plugins/mpi/pmi2/setup.c b/src/plugins/mpi/pmi2/setup.c
index e54f215b97..b57a47dd80 100644
--- a/src/plugins/mpi/pmi2/setup.c
+++ b/src/plugins/mpi/pmi2/setup.c
@@ -106,6 +106,8 @@ _setup_stepd_job_info(const stepd_step_rec_t *job, char ***env)
memset(&job_info, 0, sizeof(job_info));
+ job_info.uid = job->uid;
+
if (job->pack_jobid && (job->pack_jobid != NO_VAL)) {
job_info.jobid = job->pack_jobid;
job_info.stepid = job->stepid;
diff --git a/src/plugins/mpi/pmi2/setup.h b/src/plugins/mpi/pmi2/setup.h
index 2ce15af978..59c55399a8 100644
--- a/src/plugins/mpi/pmi2/setup.h
+++ b/src/plugins/mpi/pmi2/setup.h
@@ -59,6 +59,7 @@
typedef struct pmi2_job_info {
uint32_t jobid; /* Current SLURM job id */
uint32_t stepid; /* Current step id (or NO_VAL) */
+ uid_t uid; /* user id for job */
uint32_t nnodes; /* number of nodes in current job step */
uint32_t nodeid; /* relative position of this node in job */
uint32_t ntasks; /* total number of tasks in current job */
diff --git a/src/plugins/mpi/pmi2/spawn.c b/src/plugins/mpi/pmi2/spawn.c
index 9115060272..df48901dc3 100644
--- a/src/plugins/mpi/pmi2/spawn.c
+++ b/src/plugins/mpi/pmi2/spawn.c
@@ -151,7 +151,8 @@ spawn_req_pack(spawn_req_t *req, Buf buf)
void *auth_cred;
char *auth_info = slurm_get_auth_info();
- auth_cred = g_slurm_auth_create(auth_info);
+ auth_cred = g_slurm_auth_create(auth_info,
+ job_info.uid, NULL, 0);
xfree(auth_info);
if (auth_cred == NULL) {
error("authentication: %s",
diff --git a/src/plugins/mpi/pmix/pmixp_dconn.c b/src/plugins/mpi/pmix/pmixp_dconn.c
index 39b2082aeb..eb8888a32c 100644
--- a/src/plugins/mpi/pmix/pmixp_dconn.c
+++ b/src/plugins/mpi/pmix/pmixp_dconn.c
@@ -79,6 +79,7 @@ int pmixp_dconn_init(int node_cnt, pmixp_p2p_data_t direct_hdr)
_pmixp_dconn_conns[i].nodeid = i;
_pmixp_dconn_conns[i].state = PMIXP_DIRECT_INIT;
_pmixp_dconn_conns[i].priv = _pmixp_dconn_h.init(i, direct_hdr);
+ _pmixp_dconn_conns[i].uid = slurm_conf.slurmd_user_id;
}
return SLURM_SUCCESS;
}
diff --git a/src/plugins/mpi/pmix/pmixp_dconn.h b/src/plugins/mpi/pmix/pmixp_dconn.h
index 8dbc37bc01..8a48dbd9e8 100644
--- a/src/plugins/mpi/pmix/pmixp_dconn.h
+++ b/src/plugins/mpi/pmix/pmixp_dconn.h
@@ -82,6 +82,9 @@ typedef struct {
/* remote node info */
int nodeid;
void *priv;
+
+ /* authenticated uid on remote */
+ uid_t uid;
} pmixp_dconn_t;
typedef void *(*pmixp_dconn_p2p_init_t)(int nodeid,
diff --git a/src/plugins/mpi/pmix/pmixp_server.c b/src/plugins/mpi/pmix/pmixp_server.c
index 2d4da9e2fc..294fe5eeac 100644
--- a/src/plugins/mpi/pmix/pmixp_server.c
+++ b/src/plugins/mpi/pmix/pmixp_server.c
@@ -494,13 +494,14 @@ void pmixp_server_cleanup(void)
* --------------------- Authentication functionality -------------------
*/
-static int _auth_cred_create(Buf buf)
+static int _auth_cred_create(Buf buf, uid_t uid)
{
void *auth_cred = NULL;
char *auth_info = slurm_get_auth_info();
int rc = SLURM_SUCCESS;
- auth_cred = g_slurm_auth_create(auth_info);
+ auth_cred = g_slurm_auth_create(auth_info,
+ uid, NULL, 0);
xfree(auth_info);
if (!auth_cred) {
rc = g_slurm_auth_errno(NULL);
@@ -519,7 +520,7 @@ static int _auth_cred_create(Buf buf)
return rc;
}
-static int _auth_cred_verify(Buf buf)
+static int _auth_cred_verify(Buf buf, uid_t *uid)
{
void *auth_cred = NULL;
char *auth_info = NULL;
@@ -536,9 +537,19 @@ static int _auth_cred_verify(Buf buf)
rc = g_slurm_auth_verify(auth_cred, auth_info);
xfree(auth_info);
- if (rc)
+ if (rc) {
PMIXP_ERROR("Verifying authentication credential: %s",
g_slurm_auth_errstr(g_slurm_auth_errno(auth_cred)));
+ } else {
+ uid_t auth_uid;
+ auth_uid = g_slurm_auth_get_uid(auth_cred);
+ if ((auth_uid != slurm_conf.slurmd_user_id) &&
+ (auth_uid != _pmixp_job_info.uid)) {
+ PMIXP_ERROR("Credential from uid %u", auth_uid);
+ rc = SLURM_ERROR;
+ }
+ *uid = auth_uid;
+ }
g_slurm_auth_destroy(auth_cred);
return rc;
}
@@ -706,7 +717,7 @@ static int _process_extended_hdr(pmixp_base_hdr_t *hdr, Buf buf)
pmixp_base_hdr_t bhdr;
init_msg = xmalloc(sizeof(*init_msg));
- rc = _auth_cred_create(buf_init);
+ rc = _auth_cred_create(buf_init, dconn->uid);
if (rc) {
free_buf(init_msg->buf_ptr);
xfree(init_msg);
@@ -1099,6 +1110,7 @@ _direct_conn_establish(pmixp_conn_t *conn, void *_hdr, void *msg)
Buf buf_msg;
int rc;
char *nodename = NULL;
+ uid_t uid = SLURM_AUTH_NOBODY;
if (!hdr->ext_flag) {
nodename = pmixp_info_job_host(hdr->nodeid);
@@ -1122,7 +1134,7 @@ _direct_conn_establish(pmixp_conn_t *conn, void *_hdr, void *msg)
return;
}
/* Unpack and verify the auth credential */
- rc = _auth_cred_verify(buf_msg);
+ rc = _auth_cred_verify(buf_msg, &uid);
free_buf(buf_msg);
if (rc) {
close(fd);
@@ -1146,6 +1158,9 @@ _direct_conn_establish(pmixp_conn_t *conn, void *_hdr, void *msg)
xfree(nodename);
return;
}
+
+ dconn->uid = uid;
+
new_conn = pmixp_conn_new_persist(PMIXP_PROTO_DIRECT,
pmixp_dconn_engine(dconn),
_direct_new_msg_conn,
diff --git a/src/plugins/mpi/pmix/pmixp_utils.c b/src/plugins/mpi/pmix/pmixp_utils.c
index 69bbf3ceca..ba755552a7 100644
--- a/src/plugins/mpi/pmix/pmixp_utils.c
+++ b/src/plugins/mpi/pmix/pmixp_utils.c
@@ -404,6 +404,7 @@ static int _pmix_p2p_send_core(const char *nodename, const char *address,
msg.forward.timeout = timeout;
msg.forward.cnt = 0;
msg.forward.nodelist = NULL;
+ slurm_msg_set_r_uid(&msg, slurm_conf.slurmd_user_id);
ret_list = slurm_send_addr_recv_msgs(&msg, (char*)nodename, timeout);
if (!ret_list) {
/* This should never happen (when this was
diff --git a/src/sattach/sattach.c b/src/sattach/sattach.c
index 6bc70c8458..96b328da8f 100644
--- a/src/sattach/sattach.c
+++ b/src/sattach/sattach.c
@@ -405,6 +405,7 @@ static int _attach_to_tasks(uint32_t jobid,
reattach_msg.io_port = io_ports;
reattach_msg.cred = fake_cred;
+ slurm_msg_set_r_uid(&msg, SLURM_AUTH_UID_ANY);
msg.msg_type = REQUEST_REATTACH_TASKS;
msg.data = &reattach_msg;
msg.protocol_version = layout->start_protocol_ver;
diff --git a/src/slurmctld/agent.c b/src/slurmctld/agent.c
index 6cdc9068eb..667856e7db 100644
--- a/src/slurmctld/agent.c
+++ b/src/slurmctld/agent.c
@@ -140,6 +140,7 @@ typedef struct agent_info {
uint16_t retry; /* if set, keep trying */
thd_t *thread_struct; /* thread structures */
bool get_reply; /* flag if reply expected */
+ uid_t r_uid; /* receiver UID */
slurm_msg_type_t msg_type; /* RPC to be issued */
void **msg_args_pptr; /* RPC data to be used */
uint16_t protocol_version; /* if set, use this version */
@@ -153,6 +154,7 @@ typedef struct task_info {
uint32_t *threads_active_ptr; /* currently active thread ptr */
thd_t *thread_struct_ptr; /* thread structures ptr */
bool get_reply; /* flag if reply expected */
+ uid_t r_uid; /* receiver UID */
slurm_msg_type_t msg_type; /* RPC to be issued */
void *msg_args_ptr; /* ptr to RPC data to be used */
uint16_t protocol_version; /* if set, use this version */
@@ -409,6 +411,11 @@ static int _valid_agent_arg(agent_arg_t *agent_arg_ptr)
__func__, agent_arg_ptr->node_count, hostlist_cnt);
return SLURM_FAILURE; /* no messages to be sent */
}
+ if (!agent_arg_ptr->r_uid_set) {
+ error("%s: r_uid not set for message:%u ",
+ __func__, agent_arg_ptr->msg_type);
+ return SLURM_ERROR;
+ }
return SLURM_SUCCESS;
}
@@ -431,6 +438,7 @@ static agent_info_t *_make_agent_info(agent_arg_t *agent_arg_ptr)
thread_ptr = xmalloc(agent_info_ptr->thread_count * sizeof(thd_t));
memset(thread_ptr, 0, (agent_info_ptr->thread_count * sizeof(thd_t)));
agent_info_ptr->thread_struct = thread_ptr;
+ agent_info_ptr->r_uid = agent_arg_ptr->r_uid;
agent_info_ptr->msg_type = agent_arg_ptr->msg_type;
agent_info_ptr->msg_args_pptr = &agent_arg_ptr->msg_args;
agent_info_ptr->protocol_version = agent_arg_ptr->protocol_version;
@@ -514,6 +522,7 @@ static task_info_t *_make_task_data(agent_info_t *agent_info_ptr, int inx)
task_info_ptr->threads_active_ptr= &agent_info_ptr->threads_active;
task_info_ptr->thread_struct_ptr = &agent_info_ptr->thread_struct[inx];
task_info_ptr->get_reply = agent_info_ptr->get_reply;
+ task_info_ptr->r_uid = agent_info_ptr->r_uid;
task_info_ptr->msg_type = agent_info_ptr->msg_type;
task_info_ptr->msg_args_ptr = *agent_info_ptr->msg_args_pptr;
task_info_ptr->protocol_version = agent_info_ptr->protocol_version;
@@ -905,6 +914,7 @@ static void *_thread_per_group_rpc(void *args)
msg.msg_type = msg_type;
msg.data = task_ptr->msg_args_ptr;
+ slurm_msg_set_r_uid(&msg, task_ptr->r_uid);
#if 0
info("sending message type %u to %s", msg_type, thread_ptr->nodelist);
#endif
@@ -1237,6 +1247,8 @@ static void _queue_agent_retry(agent_info_t * agent_info_ptr, int count)
agent_arg_ptr->msg_args = *(agent_info_ptr->msg_args_pptr);
*(agent_info_ptr->msg_args_pptr) = NULL;
+ set_agent_arg_r_uid(agent_arg_ptr, agent_info_ptr->r_uid);
+
j = 0;
for (i = 0; i < agent_info_ptr->thread_count; i++) {
if (!thread_ptr[i].ret_list) {
@@ -1987,3 +1999,10 @@ extern int retry_list_size(void)
return 0;
return list_count(retry_list);
}
+
+/* Set r_uid of agent_arg */
+extern void set_agent_arg_r_uid(agent_arg_t *agent_arg_ptr, uid_t r_uid)
+{
+ agent_arg_ptr->r_uid = r_uid;
+ agent_arg_ptr->r_uid_set = true;
+}
diff --git a/src/slurmctld/agent.h b/src/slurmctld/agent.h
index 64ba75ea30..3a92e9282b 100644
--- a/src/slurmctld/agent.h
+++ b/src/slurmctld/agent.h
@@ -54,6 +54,8 @@ typedef struct agent_arg {
uint32_t node_count; /* number of nodes to communicate
* with */
uint16_t retry; /* if set, keep trying */
+ uid_t r_uid; /* receiver UID */
+ bool r_uid_set; /* True if receiver UID set*/
slurm_addr_t *addr; /* if set will send to this
addr not hostlist */
hostlist_t hostlist; /* hostlist containing the
@@ -108,4 +110,7 @@ extern void mail_job_info (struct job_record *job_ptr, uint16_t mail_type);
/* Return length of agent's retry_list */
extern int retry_list_size(void);
+/* Set r_uid of agent_arg */
+extern void set_agent_arg_r_uid(agent_arg_t *agent_arg_ptr, uid_t r_uid);
+
#endif /* !_AGENT_H */
diff --git a/src/slurmctld/backup.c b/src/slurmctld/backup.c
index 24ddcde10f..9b98594c9c 100644
--- a/src/slurmctld/backup.c
+++ b/src/slurmctld/backup.c
@@ -304,7 +304,7 @@ static void *_background_rpc_mgr(void *no_data)
int sockfd;
slurm_addr_t cli_addr;
slurm_msg_t msg;
- int error_code;
+ int error_code = SLURM_ERROR;
char* node_addr = NULL;
/* Read configuration only */
@@ -358,8 +358,9 @@ static void *_background_rpc_mgr(void *no_data)
slurm_msg_t_init(&msg);
if (slurm_receive_msg(newsockfd, &msg, 0) != 0)
error("slurm_receive_msg: %m");
+ else
+ error_code = _background_process_msg(&msg);
- error_code = _background_process_msg(&msg);
if ((error_code == SLURM_SUCCESS) &&
(msg.msg_type == REQUEST_SHUTDOWN_IMMEDIATE) &&
(slurmctld_config.shutdown_time == 0))
@@ -381,6 +382,10 @@ static int _background_process_msg(slurm_msg_t * msg)
{
int error_code = SLURM_SUCCESS;
+ if (!msg->auth_uid_set)
+ fatal("%s: received message without previously validated auth",
+ __func__);
+
if (msg->msg_type != REQUEST_PING) {
bool super_user = false;
char *auth_info = slurm_get_auth_info();
diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c
index 478f150826..3fbcb29f0e 100644
--- a/src/slurmctld/controller.c
+++ b/src/slurmctld/controller.c
@@ -1681,6 +1681,7 @@ static void _queue_reboot_msg(void)
reboot_agent_args->hostlist);
debug("Queuing reboot request for nodes %s", host_str);
xfree(host_str);
+ set_agent_arg_r_uid(reboot_agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(reboot_agent_args);
last_node_update = now;
schedule_node_save();
diff --git a/src/slurmctld/fed_mgr.c b/src/slurmctld/fed_mgr.c
index 7d94e938bb..8b464847b7 100644
--- a/src/slurmctld/fed_mgr.c
+++ b/src/slurmctld/fed_mgr.c
@@ -352,6 +352,8 @@ static int _open_controller_conn(slurmdb_cluster_rec_t *cluster, bool locked)
persist_conn->rem_port = cluster->control_port;
}
+ persist_conn->r_uid = SLURM_AUTH_UID_ANY;
+
rc = slurm_persist_conn_open(persist_conn);
if (rc != SLURM_SUCCESS) {
if (_comm_fail_log(cluster)) {
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index e5ad96cdbc..781988ec45 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -5643,6 +5643,7 @@ _signal_batch_job(struct job_record *job_ptr, uint16_t signal, uint16_t flags)
signal_tasks_msg->signal = signal;
agent_args->msg_args = signal_tasks_msg;
+ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_args);
return;
}
@@ -13659,8 +13660,7 @@ extern int update_job_str(slurm_msg_t *msg, uid_t uid)
reply:
if ((rc != ESLURM_JOB_SETTING_DB_INX) && (msg->conn_fd >= 0)) {
- slurm_msg_t_init(&resp_msg);
- resp_msg.protocol_version = msg->protocol_version;
+ response_init(&resp_msg, msg);
if (resp_array) {
resp_array_msg = _resp_array_xlate(resp_array, job_id);
resp_msg.msg_type = RESPONSE_JOB_ARRAY_ERRORS;
@@ -13670,7 +13670,6 @@ reply:
rc_msg.return_code = rc;
resp_msg.data = &rc_msg;
}
- resp_msg.conn = msg->conn;
slurm_send_node_msg(msg->conn_fd, &resp_msg);
if (resp_array_msg) {
@@ -13761,6 +13760,7 @@ static void _send_job_kill(struct job_record *job_ptr)
}
agent_args->msg_args = kill_job;
+ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_args);
return;
}
@@ -14161,6 +14161,7 @@ abort_job_on_node(uint32_t job_id, struct job_record *job_ptr, char *node_name)
agent_info->msg_type = REQUEST_ABORT_JOB;
agent_info->msg_args = kill_req;
+ set_agent_arg_r_uid(agent_info, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_info);
}
@@ -14209,6 +14210,7 @@ kill_job_on_node(uint32_t job_id, struct job_record *job_ptr,
agent_info->msg_type = REQUEST_TERMINATE_JOB;
agent_info->msg_args = kill_req;
+ set_agent_arg_r_uid(agent_info, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_info);
}
@@ -15199,6 +15201,7 @@ static void _signal_job(struct job_record *job_ptr, int signal, uint16_t flags)
}
agent_args->msg_args = signal_job_msg;
+ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_args);
return;
}
@@ -15279,6 +15282,7 @@ static void _suspend_job(struct job_record *job_ptr, uint16_t op,
}
agent_args->msg_args = sus_ptr;
+ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_args);
return;
}
@@ -15629,6 +15633,7 @@ extern int job_suspend(suspend_msg_t *sus_ptr, uid_t uid,
resp_msg.msg_type = RESPONSE_SLURM_RC;
rc_msg.return_code = rc;
resp_msg.data = &rc_msg;
+ slurm_msg_set_r_uid(&resp_msg, uid);
slurm_send_node_msg(conn_fd, &resp_msg);
}
return rc;
@@ -15784,6 +15789,7 @@ extern int job_suspend2(suspend_msg_t *sus_ptr, uid_t uid,
rc_msg.return_code = rc;
resp_msg.data = &rc_msg;
}
+ slurm_msg_set_r_uid(&resp_msg, uid);
slurm_send_node_msg(conn_fd, &resp_msg);
if (resp_array_msg) {
@@ -16509,6 +16515,7 @@ reply: FREE_NULL_LIST(top_job_list);
resp_msg.msg_type = RESPONSE_SLURM_RC;
rc_msg.return_code = rc;
resp_msg.data = &rc_msg;
+ slurm_msg_set_r_uid(&resp_msg, uid);
slurm_send_node_msg(conn_fd, &resp_msg);
}
diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c
index a1a7372359..91d77644fd 100644
--- a/src/slurmctld/job_scheduler.c
+++ b/src/slurmctld/job_scheduler.c
@@ -2701,6 +2701,7 @@ extern void launch_job(struct job_record *job_ptr)
agent_arg_ptr->hostlist = hostlist_create(launch_job_ptr->batch_host);
agent_arg_ptr->msg_type = REQUEST_BATCH_JOB_LAUNCH;
agent_arg_ptr->msg_args = (void *) launch_msg_ptr;
+ set_agent_arg_r_uid(agent_arg_ptr, SLURM_AUTH_UID_ANY);
/* Launch the RPC via agent */
agent_queue_request(agent_arg_ptr);
@@ -4176,6 +4177,7 @@ extern int reboot_job_nodes(struct job_record *job_ptr)
node_ptr->last_response = now + slurmctld_conf.resume_timeout;
}
FREE_NULL_BITMAP(boot_node_bitmap);
+ set_agent_arg_r_uid(reboot_agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(reboot_agent_args);
job_ptr->details->prolog_running++;
diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c
index 4177cbe9fc..fe5fec4e62 100644
--- a/src/slurmctld/node_mgr.c
+++ b/src/slurmctld/node_mgr.c
@@ -3658,11 +3658,11 @@ void msg_to_slurmd (slurm_msg_type_t msg_type)
xfree (kill_agent_args);
} else {
debug ("Spawning agent msg_type=%d", msg_type);
+ set_agent_arg_r_uid(kill_agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(kill_agent_args);
}
}
-
/* make_node_alloc - flag specified node as allocated to a job
* IN node_ptr - pointer to node being allocated
* IN job_ptr - pointer to job that is starting
diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c
index e94548ee8a..cdbb4eda78 100644
--- a/src/slurmctld/node_scheduler.c
+++ b/src/slurmctld/node_scheduler.c
@@ -623,6 +623,7 @@ extern void deallocate_nodes(struct job_record *job_ptr, bool timeout,
}
agent_args->msg_args = kill_job;
+ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_args);
return;
}
@@ -2938,6 +2939,7 @@ extern void launch_prolog(struct job_record *job_ptr)
select_g_step_start(build_extern_step(job_ptr));
/* Launch the RPC via agent */
+ set_agent_arg_r_uid(agent_arg_ptr, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_arg_ptr);
}
@@ -4057,6 +4059,7 @@ extern void re_kill_job(struct job_record *job_ptr)
last_job_id = job_ptr->job_id;
hostlist_destroy(kill_hostlist);
agent_args->msg_args = kill_job;
+ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_args);
return;
}
diff --git a/src/slurmctld/ping_nodes.c b/src/slurmctld/ping_nodes.c
index 4aae28f01e..f7baf70fae 100644
--- a/src/slurmctld/ping_nodes.c
+++ b/src/slurmctld/ping_nodes.c
@@ -348,6 +348,7 @@ void ping_nodes (void)
debug("Spawning ping agent for %s", host_str);
xfree(host_str);
ping_begin();
+ set_agent_arg_r_uid(ping_agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(ping_agent_args);
}
@@ -362,6 +363,7 @@ void ping_nodes (void)
host_str, reg_agent_args->node_count);
xfree(host_str);
ping_begin();
+ set_agent_arg_r_uid(reg_agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(reg_agent_args);
}
@@ -522,6 +524,7 @@ extern void run_health_check(void)
debug("Spawning health check agent for %s", host_str);
xfree(host_str);
ping_begin();
+ set_agent_arg_r_uid(check_agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(check_agent_args);
}
}
@@ -581,6 +584,7 @@ extern void update_nodes_acct_gather_data(void)
info("Updating acct_gather data for %s", host_str);
xfree(host_str);
ping_begin();
+ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_args);
}
}
diff --git a/src/slurmctld/srun_comm.c b/src/slurmctld/srun_comm.c
index 570cbe219e..da4ad39f83 100644
--- a/src/slurmctld/srun_comm.c
+++ b/src/slurmctld/srun_comm.c
@@ -57,7 +57,7 @@
*/
static void _srun_agent_launch(slurm_addr_t *addr, char *host,
slurm_msg_type_t type, void *msg_args,
- uint16_t protocol_version)
+ uid_t r_uid, uint16_t protocol_version)
{
agent_arg_t *agent_args = xmalloc(sizeof(agent_arg_t));
@@ -67,6 +67,7 @@ static void _srun_agent_launch(slurm_addr_t *addr, char *host,
agent_args->hostlist = hostlist_create(host);
agent_args->msg_type = type;
agent_args->msg_args = msg_args;
+ set_agent_arg_r_uid(agent_args, r_uid);
agent_args->protocol_version = protocol_version;
agent_queue_request(agent_args);
@@ -146,6 +147,7 @@ extern void srun_allocate (uint32_t job_id)
msg_arg = build_alloc_msg(job_ptr, SLURM_SUCCESS, NULL);
_srun_agent_launch(addr, job_ptr->alloc_node,
RESPONSE_RESOURCE_ALLOCATION, msg_arg,
+ job_ptr->user_id,
job_ptr->start_protocol_ver);
} else if (_pending_pack_jobs(job_ptr)) {
return;
@@ -169,6 +171,7 @@ extern void srun_allocate (uint32_t job_id)
list_iterator_destroy(iter);
_srun_agent_launch(addr, job_ptr->alloc_node,
RESPONSE_JOB_PACK_ALLOCATION, job_resp_list,
+ job_ptr->user_id,
job_ptr->start_protocol_ver);
} else {
error("%s: Can not find pack job leader %u",
@@ -194,7 +197,7 @@ extern void srun_allocate_abort(struct job_record *job_ptr)
msg_arg->step_id = NO_VAL;
_srun_agent_launch(addr, job_ptr->alloc_node,
SRUN_JOB_COMPLETE,
- msg_arg,
+ msg_arg, job_ptr->user_id,
job_ptr->start_protocol_ver);
}
}
@@ -248,7 +251,8 @@ extern void srun_node_fail (uint32_t job_id, char *node_name)
msg_arg->step_id = step_ptr->step_id;
msg_arg->nodelist = xstrdup(node_name);
_srun_agent_launch(addr, step_ptr->host, SRUN_NODE_FAIL,
- msg_arg, step_ptr->start_protocol_ver);
+ msg_arg, job_ptr->user_id,
+ step_ptr->start_protocol_ver);
}
list_iterator_destroy(step_iterator);
@@ -260,7 +264,8 @@ extern void srun_node_fail (uint32_t job_id, char *node_name)
msg_arg->step_id = NO_VAL;
msg_arg->nodelist = xstrdup(node_name);
_srun_agent_launch(addr, job_ptr->alloc_node, SRUN_NODE_FAIL,
- msg_arg, job_ptr->start_protocol_ver);
+ msg_arg, job_ptr->user_id,
+ job_ptr->start_protocol_ver);
}
}
@@ -294,7 +299,7 @@ extern void srun_ping (void)
msg_arg->job_id = job_ptr->job_id;
msg_arg->step_id = NO_VAL;
_srun_agent_launch(addr, job_ptr->alloc_node,
- SRUN_PING, msg_arg,
+ SRUN_PING, msg_arg, job_ptr->user_id,
job_ptr->start_protocol_ver);
}
}
@@ -325,6 +330,7 @@ extern void srun_step_timeout(struct step_record *step_ptr, time_t timeout_val)
msg_arg->step_id = step_ptr->step_id;
msg_arg->timeout = timeout_val;
_srun_agent_launch(addr, step_ptr->host, SRUN_TIMEOUT, msg_arg,
+ step_ptr->job_ptr->user_id,
step_ptr->start_protocol_ver);
}
@@ -351,7 +357,8 @@ extern void srun_timeout (struct job_record *job_ptr)
msg_arg->step_id = NO_VAL;
msg_arg->timeout = job_ptr->end_time;
_srun_agent_launch(addr, job_ptr->alloc_node, SRUN_TIMEOUT,
- msg_arg, job_ptr->start_protocol_ver);
+ msg_arg, job_ptr->user_id,
+ job_ptr->start_protocol_ver);
}
@@ -381,7 +388,8 @@ extern int srun_user_message(struct job_record *job_ptr, char *msg)
msg_arg->job_id = job_ptr->job_id;
msg_arg->msg = xstrdup(msg);
_srun_agent_launch(addr, job_ptr->resp_host, SRUN_USER_MSG,
- msg_arg, job_ptr->start_protocol_ver);
+ msg_arg, job_ptr->user_id,
+ job_ptr->start_protocol_ver);
return SLURM_SUCCESS;
} else if (job_ptr->batch_flag && IS_JOB_RUNNING(job_ptr)) {
#ifndef HAVE_FRONT_END
@@ -420,6 +428,7 @@ extern int srun_user_message(struct job_record *job_ptr, char *msg)
agent_arg_ptr->msg_type = REQUEST_JOB_NOTIFY;
agent_arg_ptr->msg_args = (void *) notify_msg_ptr;
/* Launch the RPC via agent */
+ set_agent_arg_r_uid(agent_arg_ptr, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_arg_ptr);
return SLURM_SUCCESS;
}
@@ -447,6 +456,7 @@ extern void srun_job_complete (struct job_record *job_ptr)
msg_arg->step_id = NO_VAL;
_srun_agent_launch(addr, job_ptr->alloc_node,
SRUN_JOB_COMPLETE, msg_arg,
+ job_ptr->user_id,
job_ptr->start_protocol_ver);
}
@@ -481,6 +491,7 @@ extern bool srun_job_suspend (struct job_record *job_ptr, uint16_t op)
msg_arg->op = op;
_srun_agent_launch(addr, job_ptr->alloc_node,
SRUN_REQUEST_SUSPEND, msg_arg,
+ job_ptr->user_id,
job_ptr->start_protocol_ver);
msg_sent = true;
}
@@ -504,7 +515,8 @@ extern void srun_step_complete (struct step_record *step_ptr)
msg_arg->job_id = step_ptr->job_ptr->job_id;
msg_arg->step_id = step_ptr->step_id;
_srun_agent_launch(addr, step_ptr->host, SRUN_JOB_COMPLETE,
- msg_arg, step_ptr->start_protocol_ver);
+ msg_arg, step_ptr->job_ptr->user_id,
+ step_ptr->start_protocol_ver);
}
}
@@ -529,7 +541,8 @@ extern void srun_step_missing (struct step_record *step_ptr,
msg_arg->step_id = step_ptr->step_id;
msg_arg->nodelist = xstrdup(node_list);
_srun_agent_launch(addr, step_ptr->host, SRUN_STEP_MISSING,
- msg_arg, step_ptr->start_protocol_ver);
+ msg_arg, step_ptr->job_ptr->user_id,
+ step_ptr->start_protocol_ver);
}
}
@@ -553,7 +566,8 @@ extern void srun_step_signal (struct step_record *step_ptr, uint16_t signal)
msg_arg->job_step_id = step_ptr->step_id;
msg_arg->signal = signal;
_srun_agent_launch(addr, step_ptr->host, SRUN_STEP_SIGNAL,
- msg_arg, step_ptr->start_protocol_ver);
+ msg_arg, step_ptr->job_ptr->user_id,
+ step_ptr->start_protocol_ver);
}
}
@@ -584,7 +598,8 @@ extern void srun_exec(struct step_record *step_ptr, char **argv)
for (i=0; i<cnt ; i++)
msg_arg->argv[i] = xstrdup(argv[i]);
_srun_agent_launch(addr, step_ptr->host, SRUN_EXEC,
- msg_arg, step_ptr->start_protocol_ver);
+ msg_arg, step_ptr->job_ptr->user_id,
+ step_ptr->start_protocol_ver);
} else {
error("srun_exec %u.%u lacks communication channel",
step_ptr->job_ptr->job_id, step_ptr->step_id);
diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c
index cfda0f805e..947511d7d6 100644
--- a/src/slurmctld/step_mgr.c
+++ b/src/slurmctld/step_mgr.c
@@ -692,6 +692,7 @@ void signal_step_tasks(struct step_record *step_ptr, uint16_t signal,
}
agent_args->msg_args = signal_tasks_msg;
+ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_args);
return;
}
@@ -737,6 +738,7 @@ void signal_step_tasks_on_node(char* node_name, struct step_record *step_ptr,
signal_tasks_msg->job_step_id = step_ptr->step_id;
signal_tasks_msg->signal = signal;
agent_args->msg_args = signal_tasks_msg;
+ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_args);
return;
}
@@ -4371,6 +4373,7 @@ static void _signal_step_timelimit(struct job_record *job_ptr,
}
agent_args->msg_args = kill_step;
+ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_args);
return;
}
diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c
index 8233908682..db0e4e9bf2 100644
--- a/src/slurmd/slurmd/req.c
+++ b/src/slurmd/slurmd/req.c
@@ -472,7 +472,7 @@ rwfail:
static int
_send_slurmstepd_init(int fd, int type, void *req,
- slurm_addr_t *cli, slurm_addr_t *self,
+ slurm_addr_t *cli, uid_t cli_uid, slurm_addr_t *self,
hostset_t step_hset, uint16_t protocol_version)
{
int len = 0;
@@ -581,6 +581,7 @@ _send_slurmstepd_init(int fd, int type, void *req,
safe_write(fd, get_buf_data(buffer), len);
free_buf(buffer);
buffer = NULL;
+ safe_write(fd, &cli_uid, sizeof(uid_t));
/* send self address over to slurmstepd */
if (self) {
@@ -660,7 +661,7 @@ rwfail:
*/
static int
_forkexec_slurmstepd(uint16_t type, void *req,
- slurm_addr_t *cli, slurm_addr_t *self,
+ slurm_addr_t *cli, uid_t cli_uid, slurm_addr_t *self,
const hostset_t step_hset, uint16_t protocol_version)
{
pid_t pid;
@@ -702,7 +703,7 @@ _forkexec_slurmstepd(uint16_t type, void *req,
error("Unable to close write to_slurmd in parent: %m");
if ((rc = _send_slurmstepd_init(to_stepd[1], type,
- req, cli, self,
+ req, cli, cli_uid, self,
step_hset,
protocol_version)) != 0) {
error("Unable to init slurmstepd");
@@ -1548,8 +1549,9 @@ _rpc_launch_tasks(slurm_msg_t *msg)
}
debug3("_rpc_launch_tasks: call to _forkexec_slurmstepd");
- errnum = _forkexec_slurmstepd(LAUNCH_TASKS, (void *)req, cli, &self,
- step_hset, msg->protocol_version);
+ errnum = _forkexec_slurmstepd(LAUNCH_TASKS, (void *)req, cli,
+ msg->auth_uid, &self, step_hset,
+ msg->protocol_version);
debug3("_rpc_launch_tasks: return from _forkexec_slurmstepd");
_launch_complete_add(req->job_id);
@@ -2195,7 +2197,7 @@ static int _spawn_prolog_stepd(slurm_msg_t *msg)
debug3("%s: call to _forkexec_slurmstepd", __func__);
rc = _forkexec_slurmstepd(LAUNCH_TASKS, (void *)launch_req,
- cli, &self, step_hset,
+ cli, msg->auth_uid, &self, step_hset,
msg->protocol_version);
debug3("%s: return from _forkexec_slurmstepd %d",
__func__, rc);
@@ -2508,8 +2510,9 @@ _rpc_batch_job(slurm_msg_t *msg, bool new_msg)
info("Launching batch job %u for UID %u", req->job_id, req->uid);
debug3("_rpc_batch_job: call to _forkexec_slurmstepd");
- rc = _forkexec_slurmstepd(LAUNCH_BATCH_JOB, (void *)req, cli, NULL,
- (hostset_t)NULL, SLURM_PROTOCOL_VERSION);
+ rc = _forkexec_slurmstepd(LAUNCH_BATCH_JOB, (void *)req, cli,
+ msg->auth_uid, NULL, (hostset_t)NULL,
+ SLURM_PROTOCOL_VERSION);
debug3("_rpc_batch_job: return from _forkexec_slurmstepd: %d", rc);
slurm_mutex_unlock(&launch_mutex);
@@ -4479,7 +4482,7 @@ _rpc_reattach_tasks(slurm_msg_t *msg)
/* Following call fills in gtids and local_pids when successful. */
rc = stepd_attach(fd, protocol_version, &ioaddr,
- &resp_msg.address, job_cred_sig, resp);
+ &resp_msg.address, job_cred_sig, msg->auth_uid, resp);
if (rc != SLURM_SUCCESS) {
debug2("stepd_attach call failed");
goto done2;
diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c
index 649324e42c..26107ee852 100644
--- a/src/slurmd/slurmd/slurmd.c
+++ b/src/slurmd/slurmd/slurmd.c
@@ -535,7 +535,11 @@ _service_connection(void *arg)
/* if this fails we need to make sure the nodes we forward
to are taken care of and sent back. This way the control
also has a better idea what happened to us */
- slurm_send_rc_msg(msg, rc);
+ if (msg->auth_uid_set)
+ slurm_send_rc_msg(msg, rc);
+ else
+ debug("%s: incomplete message", __func__);
+
goto cleanup;
}
debug2("got this type of message %d", msg->msg_type);
diff --git a/src/slurmd/slurmstepd/io.c b/src/slurmd/slurmstepd/io.c
index 730e7fc9f0..047225efa6 100644
--- a/src/slurmd/slurmstepd/io.c
+++ b/src/slurmd/slurmstepd/io.c
@@ -1938,6 +1938,7 @@ _user_managed_io_connect(srun_info_t *srun, uint32_t gtid)
slurm_msg_t_init(&msg);
msg.protocol_version = srun->protocol_version;
msg.msg_type = TASK_USER_MANAGED_IO_STREAM;
+ slurm_msg_set_r_uid(&msg, srun->uid);
msg.data = &user_io_msg;
user_io_msg.task_id = gtid;
diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c
index 5a095c0089..b1e3d4d796 100644
--- a/src/slurmd/slurmstepd/mgr.c
+++ b/src/slurmd/slurmstepd/mgr.c
@@ -158,7 +158,7 @@ typedef struct kill_thread {
static bool _access(const char *path, int modes, uid_t uid,
int ngids, gid_t *gids);
static void _send_launch_failure(launch_tasks_request_msg_t *,
- slurm_addr_t *, int, uint16_t);
+ slurm_addr_t *, uid_t, int, uint16_t);
static int _fork_all_tasks(stepd_step_rec_t *job, bool *io_initialized);
static int _become_user(stepd_step_rec_t *job, struct priv_state *ps);
static void _set_prio_process (stepd_step_rec_t *job);
@@ -196,7 +196,8 @@ static stepd_step_rec_t *reattach_job;
*/
extern stepd_step_rec_t *
mgr_launch_tasks_setup(launch_tasks_request_msg_t *msg, slurm_addr_t *cli,
- slurm_addr_t *self, uint16_t protocol_version)
+ uid_t cli_uid, slurm_addr_t *self,
+ uint16_t protocol_version)
{
stepd_step_rec_t *job = NULL;
@@ -206,7 +207,8 @@ mgr_launch_tasks_setup(launch_tasks_request_msg_t *msg, slurm_addr_t *cli,
reset in _send_launch_failure.
*/
int fail = errno;
- _send_launch_failure(msg, cli, errno, protocol_version);
+ _send_launch_failure(msg, cli, cli_uid, errno,
+ protocol_version);
errno = fail;
return NULL;
}
@@ -679,6 +681,7 @@ _send_exit_msg(stepd_step_rec_t *job, uint32_t *tid, int n, int status)
/* This should always be set to something else we have a bug. */
xassert(srun->protocol_version);
resp.protocol_version = srun->protocol_version;
+ slurm_msg_set_r_uid(&resp, srun->uid);
if (_send_srun_resp_msg(&resp, job->nnodes) != SLURM_SUCCESS)
error("Failed to send MESSAGE_TASK_EXIT: %m");
@@ -774,6 +777,7 @@ _one_step_complete_msg(stepd_step_rec_t *job, int first, int last)
}
/*********************************************/
slurm_msg_t_init(&req);
+ slurm_msg_set_r_uid(&req, slurm_conf.slurmd_user_id);
req.msg_type = REQUEST_STEP_COMPLETE;
req.data = &msg;
req.address = step_complete.parent_addr;
@@ -2330,8 +2334,8 @@ extern int stepd_drain_node(char *reason)
}
static void
-_send_launch_failure(launch_tasks_request_msg_t *msg, slurm_addr_t *cli, int rc,
- uint16_t protocol_version)
+_send_launch_failure(launch_tasks_request_msg_t *msg, slurm_addr_t *cli,
+ uid_t cli_uid, int rc, uint16_t protocol_version)
{
slurm_msg_t resp_msg;
launch_tasks_response_msg_t resp;
@@ -2367,6 +2371,7 @@ _send_launch_failure(launch_tasks_request_msg_t *msg, slurm_addr_t *cli, int rc,
resp_msg.data = &resp;
resp_msg.msg_type = RESPONSE_LAUNCH_TASKS;
resp_msg.protocol_version = protocol_version;
+ slurm_msg_set_r_uid(&resp_msg, cli_uid);
resp.job_id = msg->job_id;
resp.step_id = msg->job_step_id;
@@ -2395,6 +2400,7 @@ _send_launch_resp(stepd_step_rec_t *job, int rc)
slurm_msg_t_init(&resp_msg);
resp_msg.address = srun->resp_addr;
+ slurm_msg_set_r_uid(&resp_msg, srun->uid);
resp_msg.protocol_version = srun->protocol_version;
resp_msg.data = &resp;
resp_msg.msg_type = RESPONSE_LAUNCH_TASKS;
diff --git a/src/slurmd/slurmstepd/mgr.h b/src/slurmd/slurmstepd/mgr.h
index 645d5aea20..a5f81106d5 100644
--- a/src/slurmd/slurmstepd/mgr.h
+++ b/src/slurmd/slurmstepd/mgr.h
@@ -53,7 +53,7 @@ void batch_finish(stepd_step_rec_t *job, int rc);
* Initialize a stepd_step_rec_t structure for a launch tasks
*/
stepd_step_rec_t *mgr_launch_tasks_setup(launch_tasks_request_msg_t *msg,
- slurm_addr_t *client,
+ slurm_addr_t *cli, uid_t cli_uid,
slurm_addr_t *self,
uint16_t protocol_version);
diff --git a/src/slurmd/slurmstepd/req.c b/src/slurmd/slurmstepd/req.c
index 110a40d695..b521ba1e38 100644
--- a/src/slurmd/slurmstepd/req.c
+++ b/src/slurmd/slurmstepd/req.c
@@ -1110,6 +1110,7 @@ _handle_attach(int fd, stepd_step_rec_t *job, uid_t uid)
safe_read(fd, &srun->ioaddr, sizeof(slurm_addr_t));
safe_read(fd, &srun->resp_addr, sizeof(slurm_addr_t));
safe_read(fd, srun->key, SLURM_IO_KEY_SIZE);
+ safe_read(fd, &srun->uid, sizeof(uid_t));
safe_read(fd, &protocol_version, sizeof(int));
if (!protocol_version)
diff --git a/src/slurmd/slurmstepd/slurmstepd.c b/src/slurmd/slurmstepd/slurmstepd.c
index b589432f9d..31b03a94ab 100644
--- a/src/slurmd/slurmstepd/slurmstepd.c
+++ b/src/slurmd/slurmstepd/slurmstepd.c
@@ -71,15 +71,16 @@
#include "src/slurmd/slurmstepd/slurmstepd.h"
#include "src/slurmd/slurmstepd/slurmstepd_job.h"
-static int _init_from_slurmd(int sock, char **argv, slurm_addr_t **_cli,
+static int _init_from_slurmd(int sock, char **argv,
+ slurm_addr_t **_cli, uid_t *_cli_uid,
slurm_addr_t **_self, slurm_msg_t **_msg);
static void _dump_user_env(void);
static void _send_ok_to_slurmd(int sock);
static void _send_fail_to_slurmd(int sock);
static void _got_ack_from_slurmd(int);
-static stepd_step_rec_t *_step_setup(slurm_addr_t *cli, slurm_addr_t *self,
- slurm_msg_t *msg);
+static stepd_step_rec_t *_step_setup(slurm_addr_t *cli, uid_t cli_uid,
+ slurm_addr_t *self, slurm_msg_t *msg);
#ifdef MEMORY_LEAK_DEBUG
static void _step_cleanup(stepd_step_rec_t *job, slurm_msg_t *msg, int rc);
#endif
@@ -102,6 +103,7 @@ int
main (int argc, char **argv)
{
slurm_addr_t *cli;
+ uid_t cli_uid;
slurm_addr_t *self;
slurm_msg_t *msg;
stepd_step_rec_t *job;
@@ -122,11 +124,11 @@ main (int argc, char **argv)
fatal( "failed to initialize authentication plugin" );
/* Receive job parameters from the slurmd */
- _init_from_slurmd(STDIN_FILENO, argv, &cli, &self, &msg);
+ _init_from_slurmd(STDIN_FILENO, argv, &cli, &cli_uid, &self, &msg);
/* Create the stepd_step_rec_t, mostly from info in a
* launch_tasks_request_msg_t or a batch_job_launch_msg_t */
- if (!(job = _step_setup(cli, self, msg))) {
+ if (!(job = _step_setup(cli, cli_uid, self, msg))) {
_send_fail_to_slurmd(STDOUT_FILENO);
rc = SLURM_FAILURE;
goto ending;
@@ -460,7 +462,8 @@ static void _set_job_log_prefix(uint32_t jobid, uint32_t stepid)
*/
static int
_init_from_slurmd(int sock, char **argv,
- slurm_addr_t **_cli, slurm_addr_t **_self, slurm_msg_t **_msg)
+ slurm_addr_t **_cli, uid_t *_cli_uid, slurm_addr_t **_self,
+ slurm_msg_t **_msg)
{
char *incoming_buffer = NULL;
Buf buffer;
@@ -468,6 +471,7 @@ _init_from_slurmd(int sock, char **argv,
int len;
uint16_t proto;
slurm_addr_t *cli = NULL;
+ uid_t cli_uid;
slurm_addr_t *self = NULL;
slurm_msg_t *msg = NULL;
uint16_t port;
@@ -521,6 +525,7 @@ _init_from_slurmd(int sock, char **argv,
if (slurm_unpack_slurm_addr_no_alloc(cli, buffer) == SLURM_ERROR)
fatal("slurmstepd: problem with unpack of slurmd_conf");
free_buf(buffer);
+ safe_read(sock, &cli_uid, sizeof(uid_t));
/* receive self from slurmd */
safe_read(sock, &len, sizeof(int));
@@ -601,6 +606,7 @@ _init_from_slurmd(int sock, char **argv,
msg->protocol_version = proto;
*_cli = cli;
+ *_cli_uid = cli_uid;
*_self = self;
*_msg = msg;
@@ -612,7 +618,8 @@ rwfail:
}
static stepd_step_rec_t *
-_step_setup(slurm_addr_t *cli, slurm_addr_t *self, slurm_msg_t *msg)
+_step_setup(slurm_addr_t *cli, uid_t cli_uid, slurm_addr_t *self,
+ slurm_msg_t *msg)
{
stepd_step_rec_t *job = NULL;
@@ -623,7 +630,7 @@ _step_setup(slurm_addr_t *cli, slurm_addr_t *self, slurm_msg_t *msg)
break;
case REQUEST_LAUNCH_TASKS:
debug2("setup for a launch_task");
- job = mgr_launch_tasks_setup(msg->data, cli, self,
+ job = mgr_launch_tasks_setup(msg->data, cli, cli_uid, self,
msg->protocol_version);
break;
default:
diff --git a/src/slurmd/slurmstepd/slurmstepd_job.c b/src/slurmd/slurmstepd/slurmstepd_job.c
index 6a02047084..9a7fa951b2 100644
--- a/src/slurmd/slurmstepd/slurmstepd_job.c
+++ b/src/slurmd/slurmstepd/slurmstepd_job.c
@@ -376,7 +376,7 @@ extern stepd_step_rec_t *stepd_step_rec_create(launch_tasks_request_msg_t *msg,
memset(&io_addr, 0, sizeof(slurm_addr_t));
}
- srun = srun_info_create(msg->cred, &resp_addr, &io_addr,
+ srun = srun_info_create(msg->cred, &resp_addr, &io_addr, job->uid,
protocol_version);
job->profile = msg->profile;
@@ -549,7 +549,7 @@ batch_stepd_step_rec_create(batch_job_launch_msg_t *msg)
get_cred_gres(msg->cred, conf->node_name,
&job->job_gres_list, &job->step_gres_list);
- srun = srun_info_create(NULL, NULL, NULL, NO_VAL16);
+ srun = srun_info_create(NULL, NULL, NULL, job->uid, NO_VAL16);
list_append(job->sruns, (void *) srun);
@@ -632,7 +632,7 @@ stepd_step_rec_destroy(stepd_step_rec_t *job)
extern srun_info_t *
srun_info_create(slurm_cred_t *cred, slurm_addr_t *resp_addr,
- slurm_addr_t *ioaddr, uint16_t protocol_version)
+ slurm_addr_t *ioaddr, uid_t uid, uint16_t protocol_version)
{
char *data = NULL;
uint32_t len = 0;
@@ -643,6 +643,7 @@ srun_info_create(slurm_cred_t *cred, slurm_addr_t *resp_addr,
if (!protocol_version || (protocol_version == NO_VAL16))
protocol_version = SLURM_PROTOCOL_VERSION;
srun->protocol_version = protocol_version;
+ srun->uid = uid;
/*
* If no credential was provided, return the empty
* srun info object. (This is used, for example, when
diff --git a/src/slurmd/slurmstepd/slurmstepd_job.h b/src/slurmd/slurmstepd/slurmstepd_job.h
index 3bf972125a..db4b3c94be 100644
--- a/src/slurmd/slurmstepd/slurmstepd_job.h
+++ b/src/slurmd/slurmstepd/slurmstepd_job.h
@@ -68,6 +68,7 @@ typedef struct {
slurm_addr_t ioaddr; /* Address to connect on for normal I/O.
Spawn IO uses messages to the normal
resp_addr. */
+ uid_t uid; /* user id for job */
uint16_t protocol_version; /* protocol_version of the srun */
} srun_info_t;
@@ -253,7 +254,8 @@ stepd_step_rec_t * batch_stepd_step_rec_create(batch_job_launch_msg_t *msg);
void stepd_step_rec_destroy(stepd_step_rec_t *job);
srun_info_t * srun_info_create(slurm_cred_t *cred, slurm_addr_t *respaddr,
- slurm_addr_t *ioaddr, uint16_t protocol_version);
+ slurm_addr_t *ioaddr, uid_t uid,
+ uint16_t protocol_version);
void srun_info_destroy(srun_info_t *srun);
diff --git a/src/slurmd/slurmstepd/x11_forwarding.c b/src/slurmd/slurmstepd/x11_forwarding.c
index 92398a9f23..f09018572a 100644
--- a/src/slurmd/slurmstepd/x11_forwarding.c
+++ b/src/slurmd/slurmstepd/x11_forwarding.c
@@ -82,6 +82,8 @@ static int x11_display = 0;
void *_handle_channel(void *x);
void *_keepalive_engine(void *x);
void *_accept_engine(void *x);
+/* Target UID */
+static uid_t job_uid;
/*
* libssh2 has some quirks with the mixed use of blocking vs. non-blocking
@@ -197,6 +199,8 @@ extern int setup_x11_forward(stepd_step_rec_t *job, int *display)
xsignal(SIGTERM, _shutdown_x11);
xsignal_unblock(sig_array);
+ job_uid = job->uid;
+
if (!(home = _get_home(job->uid))) {
error("could not find HOME in environment");
return SLURM_ERROR;
diff --git a/src/slurmdbd/read_config.c b/src/slurmdbd/read_config.c
index 03beae60fd..dede47c2be 100644
--- a/src/slurmdbd/read_config.c
+++ b/src/slurmdbd/read_config.c
@@ -616,6 +616,7 @@ extern int read_slurmdbd_conf(void)
if (!slurmdbd_conf->purge_usage)
slurmdbd_conf->purge_usage = NO_VAL;
+ slurm_conf.last_update = time(NULL);
slurm_mutex_unlock(&conf_mutex);
return SLURM_SUCCESS;
}
diff --git a/src/slurmdbd/slurmdbd.c b/src/slurmdbd/slurmdbd.c
index ae2f27d617..a43b279f41 100644
--- a/src/slurmdbd/slurmdbd.c
+++ b/src/slurmdbd/slurmdbd.c
@@ -816,6 +816,7 @@ static int _send_slurmctld_register_req(slurmdb_cluster_rec_t *cluster_rec)
} else {
slurm_msg_t out_msg;
slurm_msg_t_init(&out_msg);
+ slurm_msg_set_r_uid(&out_msg, SLURM_AUTH_UID_ANY);
out_msg.msg_type = ACCOUNTING_REGISTER_CTLD;
out_msg.flags = SLURM_GLOBAL_AUTH_KEY;
out_msg.protocol_version = cluster_rec->rpc_version;
--
2.35.3
From d9a32698c209d183a9c18166201275f5b2dcc757 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Tue, 10 May 2022 21:39:56 +0200
Subject: [PATCH 02/31] Convert slurm_conf.slurmd_user_id ->
slurm_get_slurmd_user_id()
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/api/signal.c | 4 ++--
src/plugins/mpi/pmix/pmixp_dconn.c | 2 +-
src/plugins/mpi/pmix/pmixp_server.c | 2 +-
src/plugins/mpi/pmix/pmixp_utils.c | 2 +-
src/slurmd/slurmstepd/mgr.c | 2 +-
5 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/src/api/signal.c b/src/api/signal.c
index d829db36a5..c8e85547e2 100644
--- a/src/api/signal.c
+++ b/src/api/signal.c
@@ -101,7 +101,7 @@ static int _signal_batch_script_step(const resource_allocation_response_msg_t
rpc.flags = KILL_JOB_BATCH;
slurm_msg_t_init(&msg);
- slurm_msg_set_r_uid(&msg, slurm_conf.slurmd_user_id);
+ slurm_msg_set_r_uid(&msg, slurm_get_slurmd_user_id());
msg.msg_type = REQUEST_SIGNAL_TASKS;
msg.data = &rpc;
if (slurm_conf_get_addr(name, &msg.address) == SLURM_ERROR) {
@@ -156,7 +156,7 @@ static int _terminate_batch_script_step(const resource_allocation_response_msg_t
slurm_msg_t_init(&msg);
msg.msg_type = REQUEST_TERMINATE_TASKS;
- slurm_msg_set_r_uid(&msg, slurm_conf.slurmd_user_id);
+ slurm_msg_set_r_uid(&msg, slurm_get_slurmd_user_id());
msg.data = &rpc;
if (slurm_conf_get_addr(name, &msg.address) == SLURM_ERROR) {
diff --git a/src/plugins/mpi/pmix/pmixp_dconn.c b/src/plugins/mpi/pmix/pmixp_dconn.c
index eb8888a32c..becdae85f6 100644
--- a/src/plugins/mpi/pmix/pmixp_dconn.c
+++ b/src/plugins/mpi/pmix/pmixp_dconn.c
@@ -79,7 +79,7 @@ int pmixp_dconn_init(int node_cnt, pmixp_p2p_data_t direct_hdr)
_pmixp_dconn_conns[i].nodeid = i;
_pmixp_dconn_conns[i].state = PMIXP_DIRECT_INIT;
_pmixp_dconn_conns[i].priv = _pmixp_dconn_h.init(i, direct_hdr);
- _pmixp_dconn_conns[i].uid = slurm_conf.slurmd_user_id;
+ _pmixp_dconn_conns[i].uid = slurm_get_slurmd_user_id();
}
return SLURM_SUCCESS;
}
diff --git a/src/plugins/mpi/pmix/pmixp_server.c b/src/plugins/mpi/pmix/pmixp_server.c
index 294fe5eeac..e77f0f2034 100644
--- a/src/plugins/mpi/pmix/pmixp_server.c
+++ b/src/plugins/mpi/pmix/pmixp_server.c
@@ -543,7 +543,7 @@ static int _auth_cred_verify(Buf buf, uid_t *uid)
} else {
uid_t auth_uid;
auth_uid = g_slurm_auth_get_uid(auth_cred);
- if ((auth_uid != slurm_conf.slurmd_user_id) &&
+ if ((auth_uid != slurm_get_slurmd_user_id()) &&
(auth_uid != _pmixp_job_info.uid)) {
PMIXP_ERROR("Credential from uid %u", auth_uid);
rc = SLURM_ERROR;
diff --git a/src/plugins/mpi/pmix/pmixp_utils.c b/src/plugins/mpi/pmix/pmixp_utils.c
index ba755552a7..1812062715 100644
--- a/src/plugins/mpi/pmix/pmixp_utils.c
+++ b/src/plugins/mpi/pmix/pmixp_utils.c
@@ -404,7 +404,7 @@ static int _pmix_p2p_send_core(const char *nodename, const char *address,
msg.forward.timeout = timeout;
msg.forward.cnt = 0;
msg.forward.nodelist = NULL;
- slurm_msg_set_r_uid(&msg, slurm_conf.slurmd_user_id);
+ slurm_msg_set_r_uid(&msg, slurm_get_slurmd_user_id());
ret_list = slurm_send_addr_recv_msgs(&msg, (char*)nodename, timeout);
if (!ret_list) {
/* This should never happen (when this was
diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c
index b1e3d4d796..e2d5fac92b 100644
--- a/src/slurmd/slurmstepd/mgr.c
+++ b/src/slurmd/slurmstepd/mgr.c
@@ -777,7 +777,7 @@ _one_step_complete_msg(stepd_step_rec_t *job, int first, int last)
}
/*********************************************/
slurm_msg_t_init(&req);
- slurm_msg_set_r_uid(&req, slurm_conf.slurmd_user_id);
+ slurm_msg_set_r_uid(&req, slurm_get_slurmd_user_id());
req.msg_type = REQUEST_STEP_COMPLETE;
req.data = &msg;
req.address = step_complete.parent_addr;
--
2.35.3
From a762c179ad3ed41aae9c10d2602c7c91d5fd1b98 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Tue, 10 May 2022 21:44:04 +0200
Subject: [PATCH 03/31] Convert slurm_conf.slurm_user_id ->
slurm_get_slurm_user_id()
Signed-off-by: Egbert Eich <eich@suse.com>
# Conflicts:
# src/common/slurm_protocol_api.c
# src/slurmctld/backup.c
# src/slurmctld/controller.c
---
src/api/reconfigure.c | 2 +-
src/common/slurm_protocol_api.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/api/reconfigure.c b/src/api/reconfigure.c
index a53172e743..a62a1ddc60 100644
--- a/src/api/reconfigure.c
+++ b/src/api/reconfigure.c
@@ -160,7 +160,7 @@ _send_message_controller (enum controller_id dest, slurm_msg_t *req)
if ((fd = slurm_open_controller_conn_spec(dest,working_cluster_rec)) <0)
slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_CONNECTION_ERROR);
- slurm_msg_set_r_uid(req, slurm_conf.slurm_user_id);
+ slurm_msg_set_r_uid(req, slurm_get_slurm_user_id());
if (slurm_send_node_msg(fd, req) < 0) {
slurm_shutdown_msg_conn(fd);
slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_SEND_ERROR);
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index 01ea2648ba..f297bf512d 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -4705,7 +4705,7 @@ extern int slurm_send_only_controller_msg(slurm_msg_t *req,
goto cleanup;
}
- slurm_msg_set_r_uid(req, slurm_conf.slurm_user_id);
+ slurm_msg_set_r_uid(req, slurm_get_slurm_user_id());
if ((rc = slurm_send_node_msg(fd, req)) < 0) {
rc = SLURM_ERROR;
--
2.35.3
From dc3f4f2a5e57d0e6b84bac74267a25c2ee1e0866 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Tue, 10 May 2022 21:46:37 +0200
Subject: [PATCH 04/31] Convert slurm_conf.last_update ->
slurmctld_conf.last_update
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_protocol_api.c | 4 ++--
src/slurmdbd/read_config.c | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index f297bf512d..ac4a05fe7c 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -893,10 +893,10 @@ static int _check_hash(buf_t *buffer, header_t *header, slurm_msg_t *msg,
static time_t config_update = (time_t) -1;
static bool block_null_hash = true;
- if (config_update != slurm_conf.last_update) {
+ if (config_update != slurmctld_conf.last_update) {
block_null_hash = (xstrcasestr(slurm_conf.comm_params,
"block_null_hash"));
- config_update = slurm_conf.last_update;
+ config_update = slurmctld_conf.last_update;
}
rc = auth_g_get_data(cred, &cred_hash, &cred_hash_len);
diff --git a/src/slurmdbd/read_config.c b/src/slurmdbd/read_config.c
index dede47c2be..f29850c8e5 100644
--- a/src/slurmdbd/read_config.c
+++ b/src/slurmdbd/read_config.c
@@ -616,7 +616,7 @@ extern int read_slurmdbd_conf(void)
if (!slurmdbd_conf->purge_usage)
slurmdbd_conf->purge_usage = NO_VAL;
- slurm_conf.last_update = time(NULL);
+ slurmctld_conf.last_update = time(NULL);
slurm_mutex_unlock(&conf_mutex);
return SLURM_SUCCESS;
}
--
2.35.3
From 8a4504becf4d031a8554d280b6599bdca1953412 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Tue, 10 May 2022 21:54:56 +0200
Subject: [PATCH 05/31] Convert log_flag_hex() to local _print_data()
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_protocol_api.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index ac4a05fe7c..b607076e7f 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -3626,7 +3626,9 @@ List slurm_receive_resp_msgs(int fd, int steps, int timeout)
goto total_return;
}
- log_flag_hex(NET_RAW, buf, buflen, "%s: read", __func__);
+#if _DEBUG
+ _print_data (buf, buflen);
+#endif
buffer = create_buf(buf, buflen);
if (unpack_header(&header, buffer) == SLURM_ERROR) {
--
2.35.3
From 916dd96ac9a599ced5f561826e7bd3b4fb1c0d51 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Tue, 10 May 2022 21:57:09 +0200
Subject: [PATCH 06/31] Convert slurm_conf.comm_params ->
slurm_get_comm_parameters()
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_protocol_api.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index b607076e7f..cf5aa07181 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -894,8 +894,10 @@ static int _check_hash(buf_t *buffer, header_t *header, slurm_msg_t *msg,
static bool block_null_hash = true;
if (config_update != slurmctld_conf.last_update) {
- block_null_hash = (xstrcasestr(slurm_conf.comm_params,
+ char * comm_parameters = slurm_get_comm_parameters();
+ block_null_hash = (xstrcasestr(comm_parameters,
"block_null_hash"));
+ xfree(comm_parameters);
config_update = slurmctld_conf.last_update;
}
--
2.35.3
From 911413c3740ac62989f74e254b4aae2e8b33fe3e Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Tue, 10 May 2022 21:58:46 +0200
Subject: [PATCH 07/31] Convert slurm_conf.msg_timeout no ->
slurm_get_msg_timeout()
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_protocol_api.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index cf5aa07181..db258255f3 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -3591,12 +3591,12 @@ List slurm_receive_resp_msgs(int fd, int steps, int timeout)
if (timeout <= 0) {
/* convert secs to msec */
- timeout = slurm_conf.msg_timeout * 1000;
+ timeout = slurm_get_msg_timeout() * 1000;
orig_timeout = timeout;
}
if (steps) {
if (message_timeout < 0)
- message_timeout = slurm_conf.msg_timeout * 1000;
+ message_timeout = slurm_get_msg_timeout() * 1000;
orig_timeout = (timeout -
(message_timeout*(steps-1)))/steps;
steps--;
@@ -3607,9 +3607,9 @@ List slurm_receive_resp_msgs(int fd, int steps, int timeout)
/* we compare to the orig_timeout here because that is really
* what we are going to wait for each step
*/
- if (orig_timeout >= (slurm_conf.msg_timeout * 10000)) {
+ if (orig_timeout >= (slurm_get_msg_timeout() * 10000)) {
log_flag(NET, "%s: Sending a message with timeout's greater than %d seconds, requested timeout is %d seconds",
- __func__, (slurm_conf.msg_timeout * 10),
+ __func__, (slurm_get_msg_timeout() * 10),
(timeout/1000));
} else if (orig_timeout < 1000) {
log_flag(NET, "%s: Sending a message with a very short timeout of %d milliseconds each step in the tree has %d milliseconds",
--
2.35.3
From 85e1d5c256d5a65e100508d314a6175ab3df972c Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Wed, 11 May 2022 08:36:38 +0200
Subject: [PATCH 08/31] Fix g_slurm_auth_create in _pack_composite_msg
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_protocol_pack.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c
index aa82f727f6..6910c47211 100644
--- a/src/common/slurm_protocol_pack.c
+++ b/src/common/slurm_protocol_pack.c
@@ -5106,7 +5106,8 @@ _pack_composite_msg(composite_msg_t *msg, Buf buffer, uint16_t protocol_version)
/* FIXME: this should handle the
* _global_auth_key() as well. */
tmp_info->auth_cred =
- g_slurm_auth_create(auth_info);
+ g_slurm_auth_create(auth_info,
+ tmp_info->restrict_uid, NULL, 0);
xfree(auth_info);
}
--
2.35.3
From 2e7281f07b769c01ea2d083a85470f08afd58b61 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Wed, 11 May 2022 17:49:40 +0200
Subject: [PATCH 09/31] Add to set_agent_arg_r_uid() call to
_xmit_new_end_time()
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/slurmctld/job_mgr.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index 781988ec45..afc3e2cd52 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -14501,6 +14501,7 @@ _xmit_new_end_time(struct job_record *job_ptr)
#endif
agent_args->msg_args = job_time_msg_ptr;
+ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_args);
return;
}
--
2.35.3
From 3ca9b210dee1ae9e315edf0b7015cd2d585e0044 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Wed, 11 May 2022 19:41:13 +0200
Subject: [PATCH 10/31] Disable Message Aggregation
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/slurmd/slurmd/slurmd.c | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c
index 26107ee852..171656bbed 100644
--- a/src/slurmd/slurmd/slurmd.c
+++ b/src/slurmd/slurmd/slurmd.c
@@ -176,7 +176,9 @@ static void _decrement_thd_count(void);
static void _destroy_conf(void);
static int _drain_node(char *reason);
static void _fill_registration_msg(slurm_node_registration_status_msg_t *);
+#if 0
static uint64_t _get_int(const char *my_str);
+#endif
static void _handle_connection(int fd, slurm_addr_t *client);
static void _hup_handler(int);
static void _increment_thd_count(void);
@@ -1978,6 +1980,7 @@ static int _set_topo_info(void)
return rc;
}
+#if 0
static uint64_t _get_int(const char *my_str)
{
char *end = NULL;
@@ -1990,23 +1993,33 @@ static uint64_t _get_int(const char *my_str)
return NO_VAL;
return value;
}
+#endif
static uint64_t _parse_msg_aggr_params(int type, char *params)
{
uint64_t value = NO_VAL;
+#if 0
char *sub_str = NULL;
-
+#endif
if (!params)
return NO_VAL;
switch (type) {
case WINDOW_TIME:
+ info("Message aggregation has been disabled, "
+ "please check SLE release notes!");
+#if 0
if ((sub_str = xstrcasestr(params, "WindowTime=")))
value = _get_int(sub_str + 11);
+#endif
break;
case WINDOW_MSGS:
+ info("Message aggregation has been disabled, "
+ "please check SLE release notes!");
+#if 0
if ((sub_str = xstrcasestr(params, "WindowMsgs=")))
value = _get_int(sub_str + 11);
+#endif
break;
default:
fatal("invalid message aggregation parameters: %s", params);
--
2.35.3
From da5f8aa27dc60792809abffe32f735167d7202b6 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Fri, 13 May 2022 09:01:15 +0200
Subject: [PATCH 11/31] Add missing slurm_msg_set_r_uid()
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/slurmd/slurmstepd/mgr.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c
index e2d5fac92b..4af220970b 100644
--- a/src/slurmd/slurmstepd/mgr.c
+++ b/src/slurmd/slurmstepd/mgr.c
@@ -2472,6 +2472,7 @@ _send_complete_batch_script_msg(stepd_step_rec_t *job, int err, int status)
slurm_set_addr_char(&req_msg.address,
conf->port, conf->hostname);
}
+ slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY);
msg_rc = slurm_send_recv_rc_msg_only_one(&req_msg,
&rc, 0);
}
--
2.35.3
From c6814bf001839d288b3f634ca73d17a53e77c95b Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Fri, 13 May 2022 20:16:05 +0200
Subject: [PATCH 12/31] Fix g_slurm_auth_create in _stepd_connect_legacy
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/stepd_api.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/common/stepd_api.c b/src/common/stepd_api.c
index 05c0a810b1..348e29064d 100644
--- a/src/common/stepd_api.c
+++ b/src/common/stepd_api.c
@@ -252,7 +252,8 @@ stepd_connect(const char *directory, const char *nodename,
buffer = init_buf(0);
/* Create an auth credential */
auth_info = slurm_get_auth_info();
- auth_cred = g_slurm_auth_create(auth_info);
+ auth_cred = g_slurm_auth_create(auth_info,
+ slurm_get_slurmd_user_id(), NULL, 0);
xfree(auth_info);
if (auth_cred == NULL) {
error("Creating authentication credential: %s",
--
2.35.3
From c901d16afc747b133b2e6ae28f22e12dead0abe7 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Sat, 14 May 2022 09:23:17 +0200
Subject: [PATCH 13/31] Add missing auth_info arguments
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_protocol_api.c | 23 ++++++++++++++++-------
1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index db258255f3..94d6ea38c0 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -884,7 +884,7 @@ char *slurm_get_priority_weight_tres(void)
return weights;
}
-static int _check_hash(buf_t *buffer, header_t *header, slurm_msg_t *msg,
+static int _check_hash(struct slurm_buf *buffer, header_t *header, slurm_msg_t *msg,
void *cred)
{
char *cred_hash = NULL;
@@ -3178,6 +3178,7 @@ extern int slurm_unpack_received_msg(slurm_msg_t *msg, int fd, Buf buffer)
header_t header;
int rc;
void *auth_cred = NULL;
+ char *auth_info;
if (unpack_header(&header, buffer) == SLURM_ERROR) {
rc = SLURM_COMMUNICATIONS_RECEIVE_ERROR;
@@ -3227,7 +3228,7 @@ extern int slurm_unpack_received_msg(slurm_msg_t *msg, int fd, Buf buffer)
if (header.flags & SLURM_GLOBAL_AUTH_KEY) {
rc = g_slurm_auth_verify(auth_cred, _global_auth_key());
} else {
- char *auth_info = slurm_get_auth_info();
+ auth_info = slurm_get_auth_info();
rc = g_slurm_auth_verify(auth_cred, auth_info);
xfree(auth_info);
}
@@ -3241,7 +3242,9 @@ extern int slurm_unpack_received_msg(slurm_msg_t *msg, int fd, Buf buffer)
goto total_return;
}
- msg->auth_uid = g_slurm_auth_get_uid(auth_cred);
+ auth_info = slurm_get_auth_info();
+ msg->auth_uid = g_slurm_auth_get_uid(auth_cred, auth_info);
+ xfree(auth_info);
msg->auth_uid_set = true;
/*
@@ -3400,6 +3403,7 @@ List slurm_receive_msgs(int fd, int steps, int timeout)
ret_data_info_t *ret_data_info = NULL;
List ret_list = NULL;
int orig_timeout = timeout;
+ char *auth_info;
xassert(fd >= 0);
@@ -3505,7 +3509,7 @@ List slurm_receive_msgs(int fd, int steps, int timeout)
if (header.flags & SLURM_GLOBAL_AUTH_KEY) {
rc = g_slurm_auth_verify(auth_cred, _global_auth_key());
} else {
- char *auth_info = slurm_get_auth_info();
+ auth_info = slurm_get_auth_info();
rc = g_slurm_auth_verify(auth_cred, auth_info);
xfree(auth_info);
}
@@ -3519,7 +3523,9 @@ List slurm_receive_msgs(int fd, int steps, int timeout)
goto total_return;
}
- msg.auth_uid = g_slurm_auth_get_uid(auth_cred);
+ auth_info = slurm_get_auth_info();
+ msg.auth_uid = g_slurm_auth_get_uid(auth_cred, auth_info);
+ xfree(auth_info);
msg.auth_uid_set = true;
/*
* Unpack message body
@@ -3758,6 +3764,7 @@ int slurm_receive_msg_and_forward(int fd, slurm_addr_t *orig_addr,
int rc;
void *auth_cred = NULL;
Buf buffer;
+ char *auth_info;
xassert(fd >= 0);
@@ -3893,7 +3900,7 @@ int slurm_receive_msg_and_forward(int fd, slurm_addr_t *orig_addr,
if (header.flags & SLURM_GLOBAL_AUTH_KEY) {
rc = g_slurm_auth_verify(auth_cred, _global_auth_key());
} else {
- char *auth_info = slurm_get_auth_info();
+ auth_info = slurm_get_auth_info();
rc = g_slurm_auth_verify(auth_cred, auth_info);
xfree(auth_info);
}
@@ -3907,7 +3914,9 @@ int slurm_receive_msg_and_forward(int fd, slurm_addr_t *orig_addr,
goto total_return;
}
- msg->auth_uid = g_slurm_auth_get_uid(auth_cred);
+ auth_info = slurm_get_auth_info();
+ msg->auth_uid = g_slurm_auth_get_uid(auth_cred, auth_info);
+ xfree(auth_info);
msg->auth_uid_set = true;
/*
--
2.35.3
From fd4a977e5a9c2da9dc7b37b3acde87e5f9489e7b Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Mon, 16 May 2022 08:04:00 +0200
Subject: [PATCH 14/31] Fix argument in call to slurm_get_plugin_hash_enable
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_auth.c | 4 ++--
src/common/slurm_auth.h | 2 +-
src/common/slurm_protocol_api.c | 2 +-
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/common/slurm_auth.c b/src/common/slurm_auth.c
index 7ce0887ead..a423ad98a4 100644
--- a/src/common/slurm_auth.c
+++ b/src/common/slurm_auth.c
@@ -131,12 +131,12 @@ slurm_auth_generic_errstr( int slurm_errno )
}
}
-extern bool slurm_get_plugin_hash_enable(int index)
+extern bool slurm_get_plugin_hash_enable()
{
if (slurm_auth_init(NULL) < 0)
return true;
- return *(ops[index].hash_enable);
+ return *(ops.hash_enable);
}
diff --git a/src/common/slurm_auth.h b/src/common/slurm_auth.h
index f7ab7ac68e..1be28f3aa9 100644
--- a/src/common/slurm_auth.h
+++ b/src/common/slurm_auth.h
@@ -138,7 +138,7 @@ extern int slurm_auth_fini( void );
* Check if plugin type corresponding to the authentication
* plugin index supports hash.
*/
-extern bool slurm_get_plugin_hash_enable(int index);
+extern bool slurm_get_plugin_hash_enable();
/*
* Static bindings for the global authentication context.
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index 94d6ea38c0..bbdf10855a 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -909,7 +909,7 @@ static int _check_hash(struct slurm_buf *buffer, header_t *header, slurm_msg_t *
&msg->msg_type, sizeof(msg->msg_type)))
rc = SLURM_ERROR;
} else if (block_null_hash &&
- slurm_get_plugin_hash_enable(msg->auth_index))
+ slurm_get_plugin_hash_enable())
rc = SLURM_ERROR;
xfree(cred_hash);
--
2.35.3
From b29995356ce0a08e638e7c626dced096854b3449 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Mon, 16 May 2022 08:04:48 +0200
Subject: [PATCH 15/31] Fix buffer type
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_protocol_api.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index bbdf10855a..481b418e67 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -3585,7 +3585,7 @@ List slurm_receive_resp_msgs(int fd, int steps, int timeout)
int rc;
void *auth_cred = NULL;
slurm_msg_t msg;
- buf_t *buffer;
+ Buf buffer;
ret_data_info_t *ret_data_info = NULL;
List ret_list = NULL;
int orig_timeout = timeout;
--
2.35.3
From a3f5ed878c7872f2d64e685750f463007988f952 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Mon, 16 May 2022 08:06:49 +0200
Subject: [PATCH 16/31] Fix arguments to forward_init()
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_protocol_api.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index 481b418e67..e90979c4ad 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -3629,7 +3629,7 @@ List slurm_receive_resp_msgs(int fd, int steps, int timeout)
* the message.
*/
if (slurm_msg_recvfrom_timeout(fd, &buf, &buflen, 0, timeout) < 0) {
- forward_init(&header.forward);
+ forward_init(&header.forward, NULL);
rc = errno;
goto total_return;
}
--
2.35.3
From 8db39a8468aef6ffdf6b8babf4f1e61128e1e145 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Mon, 16 May 2022 08:07:20 +0200
Subject: [PATCH 17/31] Fix log message function
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_protocol_api.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index e90979c4ad..f0596490e7 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -3608,17 +3608,17 @@ List slurm_receive_resp_msgs(int fd, int steps, int timeout)
steps--;
}
- log_flag(NET, "%s: orig_timeout was %d we have %d steps and a timeout of %d",
+ debug("%s: orig_timeout was %d we have %d steps and a timeout of %d",
__func__, orig_timeout, steps, timeout);
/* we compare to the orig_timeout here because that is really
* what we are going to wait for each step
*/
if (orig_timeout >= (slurm_get_msg_timeout() * 10000)) {
- log_flag(NET, "%s: Sending a message with timeout's greater than %d seconds, requested timeout is %d seconds",
+ debug("%s: Sending a message with timeout's greater than %d seconds, requested timeout is %d seconds",
__func__, (slurm_get_msg_timeout() * 10),
(timeout/1000));
} else if (orig_timeout < 1000) {
- log_flag(NET, "%s: Sending a message with a very short timeout of %d milliseconds each step in the tree has %d milliseconds",
+ debug("%s: Sending a message with a very short timeout of %d milliseconds each step in the tree has %d milliseconds",
__func__, timeout, orig_timeout);
}
--
2.35.3
From 268094f7c4c0319bc02b4ac10ab7f12f61774c3e Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Mon, 16 May 2022 08:26:13 +0200
Subject: [PATCH 18/31] Fix aruments in g_slurm_auth_unpack()
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_protocol_api.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index f0596490e7..435495c1ae 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -3675,7 +3675,7 @@ List slurm_receive_resp_msgs(int fd, int steps, int timeout)
__func__);
}
- if (!(auth_cred = g_slurm_auth_unpack(buffer, header.version))) {
+ if (!(auth_cred = g_slurm_auth_unpack(buffer))) {
error("%s: auth_g_unpack: %m", __func__);
free_buf(buffer);
rc = ESLURM_PROTOCOL_INCOMPLETE_PACKET;
--
2.35.3
From b87fc1b346fa6a75e803b6dfd9337e30f01c43bd Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Mon, 16 May 2022 10:12:08 +0200
Subject: [PATCH 19/31] Fix auth_g_get_data()
Remove not existing wrapper.
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_auth.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/src/common/slurm_auth.c b/src/common/slurm_auth.c
index a423ad98a4..7b8a3ff61b 100644
--- a/src/common/slurm_auth.c
+++ b/src/common/slurm_auth.c
@@ -224,12 +224,10 @@ int g_slurm_auth_verify(void *cred, char *auth_info)
int auth_g_get_data(void *cred, char **data, uint32_t *len)
{
- cred_wrapper_t *wrap = (cred_wrapper_t *) cred;
-
- if (!wrap || slurm_auth_init(NULL) < 0)
+ if (slurm_auth_init(NULL) < 0)
return SLURM_ERROR;
- return (*(ops[wrap->index].get_data))(cred, data, len);
+ return (*(ops.get_data))(cred, data, len);
}
uid_t g_slurm_auth_get_uid(void *cred, char *auth_info)
--
2.35.3
From f2b152c95cbc771006130f18ebfb4dbfaeb5815e Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Mon, 16 May 2022 10:18:26 +0200
Subject: [PATCH 20/31] Add support for ESLURM_AUTH_BADARG
Signed-off-by: Egbert Eich <eich@suse.com>
---
slurm/slurm_errno.h | 1 +
src/common/slurm_errno.c | 3 ++-
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/slurm/slurm_errno.h b/slurm/slurm_errno.h
index c1d6f935e7..af5baa2441 100644
--- a/slurm/slurm_errno.h
+++ b/slurm/slurm_errno.h
@@ -262,6 +262,7 @@ enum {
ESLURM_AUTH_FOPEN_ERROR,
ESLURM_AUTH_NET_ERROR,
ESLURM_AUTH_UNABLE_TO_SIGN,
+ ESLURM_AUTH_BADARG,
/* accounting errors */
ESLURM_DB_CONNECTION = 7000,
diff --git a/src/common/slurm_errno.c b/src/common/slurm_errno.c
index 74cfc6b1c5..d4ff6d3f10 100644
--- a/src/common/slurm_errno.c
+++ b/src/common/slurm_errno.c
@@ -444,7 +444,8 @@ static slurm_errtab_t slurm_errtab[] = {
"Failed to open authentication public key" },
{ ESLURM_AUTH_NET_ERROR,
"Failed to connect to authentication agent" },
-
+ { ESLURM_AUTH_BADARG,
+ "Bad argument to plugin function" },
/* accounting errors */
{ ESLURM_DB_CONNECTION,
"Unable to connect to database" },
--
2.35.3
From 0f7d6f873ac480e3ff08513a9fe7e1f40ca65809 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Mon, 16 May 2022 10:41:08 +0200
Subject: [PATCH 21/31] Replace response_init() by slurm_msg_t_init()
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/slurmctld/job_mgr.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index afc3e2cd52..e3963a1877 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -13660,7 +13660,7 @@ extern int update_job_str(slurm_msg_t *msg, uid_t uid)
reply:
if ((rc != ESLURM_JOB_SETTING_DB_INX) && (msg->conn_fd >= 0)) {
- response_init(&resp_msg, msg);
+ slurm_msg_t_init(&resp_msg);
if (resp_array) {
resp_array_msg = _resp_array_xlate(resp_array, job_id);
resp_msg.msg_type = RESPONSE_JOB_ARRAY_ERRORS;
--
2.35.3
From f3faf8a9165caa9728ad8f812ea792b07917c829 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Mon, 16 May 2022 14:33:14 +0200
Subject: [PATCH 22/31] Replace response_init()
Signed-off-by: Egbert Eich <eich@suse.com>
# Conflicts:
# src/slurmctld/proc_req.c
---
src/slurmctld/job_mgr.c | 4 +++
src/slurmctld/proc_req.c | 78 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 82 insertions(+)
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index e3963a1877..add1f934f7 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -13661,6 +13661,8 @@ extern int update_job_str(slurm_msg_t *msg, uid_t uid)
reply:
if ((rc != ESLURM_JOB_SETTING_DB_INX) && (msg->conn_fd >= 0)) {
slurm_msg_t_init(&resp_msg);
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&resp_msg, msg->auth_uid);
if (resp_array) {
resp_array_msg = _resp_array_xlate(resp_array, job_id);
resp_msg.msg_type = RESPONSE_JOB_ARRAY_ERRORS;
@@ -16235,6 +16237,8 @@ extern int job_requeue2(uid_t uid, requeue_msg_t *req_ptr, slurm_msg_t *msg,
reply:
if (msg) {
slurm_msg_t_init(&resp_msg);
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&resp_msg, msg->auth_uid);
resp_msg.protocol_version = msg->protocol_version;
resp_msg.conn = msg->conn;
if (resp_array) {
diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c
index 54a28ff9f2..b2e34d624a 100644
--- a/src/slurmctld/proc_req.c
+++ b/src/slurmctld/proc_req.c
@@ -1437,6 +1437,8 @@ static void _slurm_rpc_allocate_pack(slurm_msg_t * msg)
response_msg.conn = msg->conn;
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.msg_type = RESPONSE_JOB_PACK_ALLOCATION;
response_msg.data = resp;
@@ -1619,6 +1621,8 @@ send_msg:
response_msg.conn = msg->conn;
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.msg_type = RESPONSE_RESOURCE_ALLOCATION;
response_msg.data = alloc_msg;
@@ -1682,6 +1686,8 @@ static void _slurm_rpc_dump_conf(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_BUILD_INFO;
@@ -1737,6 +1743,8 @@ static void _slurm_rpc_dump_jobs(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_JOB_INFO;
@@ -1779,6 +1787,8 @@ static void _slurm_rpc_dump_jobs_user(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_JOB_INFO;
@@ -1823,6 +1833,8 @@ static void _slurm_rpc_dump_job_single(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_JOB_INFO;
@@ -1852,6 +1864,8 @@ static void _slurm_rpc_get_shares(slurm_msg_t *msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_SHARE_INFO;
@@ -1881,6 +1895,8 @@ static void _slurm_rpc_get_priority_factors(slurm_msg_t *msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_PRIORITY_FACTORS;
@@ -1919,6 +1935,8 @@ static void _slurm_rpc_end_time(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = SRUN_TIMEOUT;
@@ -1947,6 +1965,8 @@ static void _slurm_rpc_get_fed(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_FED_INFO;
@@ -1995,6 +2015,8 @@ static void _slurm_rpc_dump_front_end(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_FRONT_END_INFO;
@@ -2055,6 +2077,8 @@ static void _slurm_rpc_dump_nodes(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_NODE_INFO;
@@ -2113,6 +2137,8 @@ static void _slurm_rpc_dump_node_single(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_NODE_INFO;
@@ -2166,6 +2192,8 @@ static void _slurm_rpc_dump_partitions(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_PARTITION_INFO;
@@ -2676,6 +2704,8 @@ static void _slurm_rpc_dump_batch_script(slurm_msg_t *msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_BATCH_SCRIPT;
@@ -2801,6 +2831,8 @@ static void _slurm_rpc_job_step_create(slurm_msg_t * msg)
slurm_msg_t_init(&resp);
resp.flags = msg->flags;
resp.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&resp, msg->auth_uid);
resp.address = msg->address;
resp.conn = msg->conn;
resp.msg_type = RESPONSE_JOB_STEP_CREATE;
@@ -2870,6 +2902,8 @@ static void _slurm_rpc_job_step_get_info(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_JOB_STEP_INFO;
@@ -2977,6 +3011,8 @@ send_reply:
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_JOB_WILL_RUN;
@@ -3153,6 +3189,8 @@ static void _slurm_rpc_job_alloc_info(slurm_msg_t * msg)
response_msg.data = job_info_resp_msg;
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
slurm_send_node_msg(msg->conn_fd, &response_msg);
@@ -3257,6 +3295,8 @@ static void _slurm_rpc_job_pack_alloc_info(slurm_msg_t * msg)
response_msg.data = resp;
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
slurm_send_node_msg(msg->conn_fd, &response_msg);
FREE_NULL_LIST(resp);
@@ -3500,6 +3540,8 @@ static void _slurm_rpc_job_sbcast_cred(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.msg_type = RESPONSE_JOB_SBCAST_CRED;
response_msg.data = &job_info_resp_msg;
@@ -3866,6 +3908,8 @@ static void _slurm_rpc_step_layout(slurm_msg_t *msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.msg_type = RESPONSE_STEP_LAYOUT;
response_msg.data = step_layout;
@@ -4026,6 +4070,8 @@ send_msg:
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_SUBMIT_BATCH_JOB;
response_msg.data = &submit_msg;
@@ -4302,6 +4348,8 @@ send_msg:
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_SUBMIT_BATCH_JOB;
response_msg.data = &submit_msg;
@@ -4839,6 +4887,8 @@ static void _slurm_rpc_resv_create(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
resv_resp_msg.name = resv_desc_ptr->name;
response_msg.msg_type = RESPONSE_CREATE_RESERVATION;
response_msg.data = &resv_resp_msg;
@@ -4969,6 +5019,8 @@ static void _slurm_rpc_resv_show(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_RESERVATION_INFO;
@@ -5025,6 +5077,8 @@ static void _slurm_rpc_layout_show(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_LAYOUT_INFO;
@@ -5117,6 +5171,8 @@ static void _slurm_rpc_job_ready(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
rc_msg.return_code = result;
@@ -5230,6 +5286,8 @@ static void _slurm_rpc_burst_buffer_info(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_BURST_BUFFER_INFO;
@@ -5628,6 +5686,8 @@ inline static void _slurm_rpc_trigger_get(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_TRIGGER_GET;
@@ -5715,6 +5775,8 @@ inline static void _slurm_rpc_get_topo(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_TOPO_INFO;
@@ -5747,6 +5809,8 @@ inline static void _slurm_rpc_get_powercap(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_POWERCAP_INFO;
@@ -6227,6 +6291,8 @@ inline static void _slurm_rpc_dump_spank(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONCE_SPANK_ENVIRONMENT;
@@ -6313,6 +6379,8 @@ inline static void _slurm_rpc_dump_stats(slurm_msg_t * msg)
slurm_msg_t_init(&response_msg);
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_STATS_INFO;
@@ -6376,6 +6444,8 @@ _slurm_rpc_dump_licenses(slurm_msg_t * msg)
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_LICENSE_INFO;
@@ -6591,6 +6661,8 @@ static void _slurm_rpc_composite_msg(slurm_msg_t *msg)
slurm_msg_t_init(&resp_msg);
resp_msg.flags = msg->flags;
resp_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&resp_msg, msg->auth_uid);
memcpy(&resp_msg.address, &comp_msg->sender,
sizeof(slurm_addr_t));
resp_msg.msg_type = RESPONSE_MESSAGE_COMPOSITE;
@@ -6676,6 +6748,8 @@ static void _slurm_rpc_comp_msg_list(composite_msg_t * comp_msg,
resp_msg->flags = next_msg->flags;
resp_msg->protocol_version =
next_msg->protocol_version;
+ if (next_msg->auth_uid_set)
+ slurm_msg_set_r_uid(resp_msg, next_msg->auth_uid);
resp_msg->msg_type = RESPONSE_MESSAGE_COMPOSITE;
/* You can't just set the
* resp_msg->address here, it won't
@@ -6751,6 +6825,8 @@ static void _slurm_rpc_assoc_mgr_info(slurm_msg_t * msg)
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_ASSOC_MGR_INFO;
@@ -7019,6 +7095,8 @@ static void _proc_multi_msg(uint32_t rpc_uid, slurm_msg_t *msg)
slurm_msg_t_init(&response_msg);
response_msg.flags = msg->flags;
response_msg.protocol_version = msg->protocol_version;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.address = msg->address;
response_msg.conn = msg->conn;
response_msg.msg_type = RESPONSE_CTLD_MULT_MSG;
--
2.35.3
From d079c59d888083d6cf11681b1caa713fd6dea863 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Mon, 16 May 2022 14:33:47 +0200
Subject: [PATCH 23/31] Add missing auth_uid init setting
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/slurmctld/proc_req.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c
index b2e34d624a..3d308f9055 100644
--- a/src/slurmctld/proc_req.c
+++ b/src/slurmctld/proc_req.c
@@ -6857,6 +6857,9 @@ static int _process_persist_conn(void *arg,
slurm_msg_t_init(&msg);
msg.auth_cred = persist_conn->auth_cred;
+ msg.auth_uid = *uid;
+ msg.auth_uid_set = true;
+
msg.conn = persist_conn;
msg.conn_fd = persist_conn->fd;
--
2.35.3
From fb75cf0c8ba09d858320e9c9d0acacccf61b323f Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Wed, 18 May 2022 15:09:34 +0200
Subject: [PATCH 24/31] Add missing set_agent_arg_r_uid()
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/plugins/checkpoint/blcr/checkpoint_blcr.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/plugins/checkpoint/blcr/checkpoint_blcr.c b/src/plugins/checkpoint/blcr/checkpoint_blcr.c
index b8ec15a566..23d3a4cd8b 100644
--- a/src/plugins/checkpoint/blcr/checkpoint_blcr.c
+++ b/src/plugins/checkpoint/blcr/checkpoint_blcr.c
@@ -583,6 +583,7 @@ static void _send_sig(uint32_t job_id, uint32_t step_id, uint16_t signal,
}
hostlist_iterator_destroy(hi);
+ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(agent_args);
}
--
2.35.3
From 3a67696a66dae9ed78b6db0f76260d3560971a47 Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Wed, 18 May 2022 20:35:02 +0200
Subject: [PATCH 25/31] Add slurm_msg_set_r_uid() to
_persist_fed_job_lock_bool() and _agent_thread()
This seems to be missing from upstream as well.
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/slurmctld/fed_mgr.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/slurmctld/fed_mgr.c b/src/slurmctld/fed_mgr.c
index 8b464847b7..7d8043dbd8 100644
--- a/src/slurmctld/fed_mgr.c
+++ b/src/slurmctld/fed_mgr.c
@@ -1038,6 +1038,7 @@ static int _persist_fed_job_lock_bool(slurmdb_cluster_rec_t *conn,
req_msg.protocol_version = conn->rpc_version;
req_msg.data = &sib_msg;
+ slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY);
if (_send_recv_msg(conn, &req_msg, &resp_msg, false)) {
rc = SLURM_PROTOCOL_ERROR;
@@ -2280,6 +2281,7 @@ static void *_agent_thread(void *arg)
slurm_msg_t_init(&req_msg);
req_msg.msg_type = REQUEST_CTLD_MULT_MSG;
req_msg.data = &ctld_req_msg;
+ slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY);
rc = _send_recv_msg(cluster, &req_msg, &resp_msg,
false);
--
2.35.3
From eca4da6d1c99bea7f277a1c8e8b071575d3f9cec Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Wed, 18 May 2022 20:52:16 +0200
Subject: [PATCH 26/31] Fix slurm_msg_set_r_uid() settings for checkpointing
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/checkpoint.c | 1 +
src/slurmctld/job_mgr.c | 2 ++
src/slurmctld/step_mgr.c | 3 +++
3 files changed, 6 insertions(+)
diff --git a/src/common/checkpoint.c b/src/common/checkpoint.c
index 2f4ce092ab..4df2235314 100644
--- a/src/common/checkpoint.c
+++ b/src/common/checkpoint.c
@@ -359,6 +359,7 @@ extern int checkpoint_tasks (uint32_t job_id, uint32_t step_id,
ckpt_req.image_dir = image_dir;
req_msg.msg_type = REQUEST_CHECKPOINT_TASKS;
req_msg.data = &ckpt_req;
+ slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY);
if ((ret_list = slurm_send_recv_msgs(nodelist, &req_msg, (wait*1000),
false))) {
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index add1f934f7..995e49dba2 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -16987,6 +16987,7 @@ extern int job_checkpoint(checkpoint_msg_t *ckpt_ptr, uid_t uid,
if (conn_fd < 0) /* periodic checkpoint */
return rc;
+ slurm_msg_set_r_uid(&resp_msg, uid);
if ((rc == SLURM_SUCCESS) &&
((ckpt_ptr->op == CHECK_ABLE) || (ckpt_ptr->op == CHECK_ERROR))) {
resp_msg.msg_type = RESPONSE_CHECKPOINT;
@@ -17397,6 +17398,7 @@ extern int job_restart(checkpoint_msg_t *ckpt_ptr, uid_t uid, int conn_fd,
rc_msg.return_code = rc;
resp_msg.msg_type = RESPONSE_SLURM_RC;
resp_msg.data = &rc_msg;
+ slurm_msg_set_r_uid(&resp_msg, uid);
(void) slurm_send_node_msg(conn_fd, &resp_msg);
return rc;
diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c
index 947511d7d6..5ae6b0ce04 100644
--- a/src/slurmctld/step_mgr.c
+++ b/src/slurmctld/step_mgr.c
@@ -3307,6 +3307,7 @@ extern int job_step_checkpoint(checkpoint_msg_t *ckpt_ptr,
}
reply:
+ slurm_msg_set_r_uid(&resp_msg, uid);
if ((rc == SLURM_SUCCESS) &&
((ckpt_ptr->op == CHECK_ABLE) || (ckpt_ptr->op == CHECK_ERROR))) {
resp_msg.msg_type = RESPONSE_CHECKPOINT;
@@ -3376,6 +3377,7 @@ extern int job_step_checkpoint_comp(checkpoint_comp_msg_t *ckpt_ptr,
rc_msg.return_code = rc;
resp_msg.msg_type = RESPONSE_SLURM_RC;
resp_msg.data = &rc_msg;
+ slurm_msg_set_r_uid(&resp_msg, uid);
(void) slurm_send_node_msg(conn_fd, &resp_msg);
return rc;
}
@@ -3435,6 +3437,7 @@ extern int job_step_checkpoint_task_comp(checkpoint_task_comp_msg_t *ckpt_ptr,
rc_msg.return_code = rc;
resp_msg.msg_type = RESPONSE_SLURM_RC;
resp_msg.data = &rc_msg;
+ slurm_msg_set_r_uid(&resp_msg, uid);
(void) slurm_send_node_msg(conn_fd, &resp_msg);
return rc;
}
--
2.35.3
From 5382605a3968a109b7d35619b968b9726c2d29ed Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Thu, 19 May 2022 09:48:08 +0200
Subject: [PATCH 27/31] Add missing calls to slurm_msg_set_r_uid()
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_protocol_api.c | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index 435495c1ae..48f0e10fba 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -4397,7 +4397,14 @@ int slurm_send_rc_err_msg(slurm_msg_t *msg, int rc, char *err_msg)
resp_msg.forward_struct = msg->forward_struct;
resp_msg.ret_list = msg->ret_list;
resp_msg.orig_addr = msg->orig_addr;
-
+ /* like _resp_msg_setup() */
+ if (!msg->auth_uid_set)
+ slurm_msg_set_r_uid(&resp_msg, SLURM_AUTH_NOBODY);
+ else if ((msg->auth_uid != slurm_get_slurm_user_id()) &&
+ (msg->auth_uid != slurm_get_slurmd_user_id()))
+ slurm_msg_set_r_uid(&resp_msg, msg->auth_uid);
+ else
+ slurm_msg_set_r_uid(&resp_msg, SLURM_AUTH_UID_ANY);
/* send message */
return slurm_send_node_msg(msg->conn_fd, &resp_msg);
}
@@ -4434,6 +4441,14 @@ int slurm_send_reroute_msg(slurm_msg_t *msg, slurmdb_cluster_rec_t *cluster_rec)
resp_msg.ret_list = msg->ret_list;
resp_msg.orig_addr = msg->orig_addr;
+ /* like _resp_msg_setup() */
+ if (!msg->auth_uid_set)
+ slurm_msg_set_r_uid(&resp_msg, SLURM_AUTH_NOBODY);
+ else if ((msg->auth_uid != slurm_get_slurm_user_id()) &&
+ (msg->auth_uid != slurm_get_slurmd_user_id()))
+ slurm_msg_set_r_uid(&resp_msg, msg->auth_uid);
+ else
+ slurm_msg_set_r_uid(&resp_msg, SLURM_AUTH_UID_ANY);
/* send message */
return slurm_send_node_msg(msg->conn_fd, &resp_msg);
}
--
2.35.3
From b076857cc6f697d0cb05991c356ce9e509dac83e Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Thu, 19 May 2022 22:13:56 +0200
Subject: [PATCH 28/31] Add slurm_msg_set_r_uid() for replace_batch_job() which
as been deleted since
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/slurmctld/job_scheduler.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c
index 91d77644fd..ac6a50a76e 100644
--- a/src/slurmctld/job_scheduler.c
+++ b/src/slurmctld/job_scheduler.c
@@ -969,6 +969,7 @@ send_reply:
resp_msg->address = msg->address;
resp_msg->msg_type = REQUEST_BATCH_JOB_LAUNCH;
resp_msg->data = launch_msg;
+ slurm_msg_set_r_uid(resp_msg, msg->auth_uid);
list_append(msg->ret_list, resp_msg);
} else {
slurm_msg_t response_msg;
@@ -978,6 +979,7 @@ send_reply:
response_msg.address = msg->address;
response_msg.msg_type = REQUEST_BATCH_JOB_LAUNCH;
response_msg.data = launch_msg;
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
slurm_send_node_msg(msg->conn_fd, &response_msg);
slurm_free_job_launch_msg(launch_msg);
}
--
2.35.3
From 191b41486a32cb8218ae2fb56aa589b485b6566e Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Mon, 23 May 2022 09:31:38 +0200
Subject: [PATCH 29/31] Add type bool to proc_req.c
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/slurmdbd/proc_req.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/slurmdbd/proc_req.c b/src/slurmdbd/proc_req.c
index aca876a503..8256eb9e39 100644
--- a/src/slurmdbd/proc_req.c
+++ b/src/slurmdbd/proc_req.c
@@ -44,6 +44,7 @@
#include <sys/prctl.h>
#endif
+#include <stdbool.h>
#include "src/common/slurm_auth.h"
#include "src/common/gres.h"
#include "src/common/macros.h"
--
2.35.3
From 73c9a26c60fc1cd07b234858165881286969238c Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Mon, 23 May 2022 09:33:49 +0200
Subject: [PATCH 30/31] Add config CommunicationParameters for
'block_null_hash' option
Signed-off-by: Egbert Eich <eich@suse.com>
---
slurm/slurm.h.in | 1 +
src/common/read_config.c | 6 ++++++
src/common/slurm_protocol_api.c | 18 ++++++++++++++++++
src/common/slurm_protocol_api.h | 6 ++++++
4 files changed, 31 insertions(+)
diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in
index dc75c9308f..b8f11d3249 100644
--- a/slurm/slurm.h.in
+++ b/slurm/slurm.h.in
@@ -2971,6 +2971,7 @@ typedef struct slurm_ctl_conf {
char *version; /* version of slurmctld */
uint16_t vsize_factor; /* virtual memory limit size factor */
uint16_t wait_time; /* default job --wait time */
+ char *comm_params; /* Communication parameters */
} slurm_ctl_conf_t;
typedef struct slurmd_status_msg {
diff --git a/src/common/read_config.c b/src/common/read_config.c
index 5913df1cdc..225937520b 100644
--- a/src/common/read_config.c
+++ b/src/common/read_config.c
@@ -194,6 +194,7 @@ s_p_options_t slurm_conf_options[] = {
{"ChosLoc", S_P_STRING},
{"CoreSpecPlugin", S_P_STRING},
{"ClusterName", S_P_STRING},
+ {"CommunicationParameters", S_P_STRING},
{"CompleteWait", S_P_UINT16},
{"ControlAddr", S_P_STRING},
{"ControlMachine", S_P_STRING},
@@ -2337,6 +2338,7 @@ free_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr, bool purge_node_hash)
xfree (ctl_conf_ptr->checkpoint_type);
xfree (ctl_conf_ptr->chos_loc);
xfree (ctl_conf_ptr->cluster_name);
+ xfree (ctl_conf_ptr->comm_params);
xfree (ctl_conf_ptr->control_addr);
xfree (ctl_conf_ptr->control_machine);
xfree (ctl_conf_ptr->core_spec_plugin);
@@ -2460,6 +2462,7 @@ init_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr)
xfree (ctl_conf_ptr->checkpoint_type);
xfree (ctl_conf_ptr->chos_loc);
xfree (ctl_conf_ptr->cluster_name);
+ xfree (ctl_conf_ptr->comm_params);
ctl_conf_ptr->complete_wait = NO_VAL16;
xfree (ctl_conf_ptr->control_addr);
xfree (ctl_conf_ptr->control_machine);
@@ -3096,6 +3099,9 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl)
if (s_p_get_uint16(&uint16_tmp, "CacheGroups", hashtbl))
debug("Ignoring obsolete CacheGroups option.");
+ (void) s_p_get_string(&conf->comm_params, "CommunicationParameters",
+ hashtbl);
+
if (!s_p_get_string(&conf->core_spec_plugin, "CoreSpecPlugin",
hashtbl)) {
conf->core_spec_plugin =
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index 48f0e10fba..e9f05c0319 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -1179,6 +1179,24 @@ char *slurm_get_cluster_name(void)
return name;
}
+/* slurm_get_comm_parameters
+ * returns the value of comm_param in slurmctld_conf object
+ * RET char * - comm parameters, MUST be xfreed by caller
+ */
+extern char *slurm_get_comm_parameters(void)
+{
+ char *comm_params = NULL;
+ slurm_ctl_conf_t *conf;
+
+ if (slurmdbd_conf) {
+ } else {
+ conf = slurm_conf_lock();
+ comm_params = xstrdup(conf->comm_params);
+ slurm_conf_unlock();
+ }
+ return comm_params;
+}
+
/* slurm_get_crypto_type
* returns the crypto_type from slurmctld_conf object
* RET char * - crypto type, MUST be xfreed by caller
diff --git a/src/common/slurm_protocol_api.h b/src/common/slurm_protocol_api.h
index 8da140c565..267cfe627a 100644
--- a/src/common/slurm_protocol_api.h
+++ b/src/common/slurm_protocol_api.h
@@ -468,6 +468,12 @@ extern char *slurm_get_checkpoint_dir(void);
*/
char *slurm_get_cluster_name(void);
+/* slurm_get_comm_parameters
+ * returns the value of comm_param in slurmctld_conf object
+ * RET char * - comm parameters, MUST be xfreed by caller
+ */
+extern char *slurm_get_comm_parameters(void);
+
/* slurm_get_crypto_type
* returns the crypto_type from slurmctld_conf object
* RET char * - crypto type, MUST be xfreed by caller
--
2.35.3
From 0a33589bc684ae9db9f2b1a05221266bd2886e7f Mon Sep 17 00:00:00 2001
From: Egbert Eich <eich@suse.com>
Date: Tue, 24 May 2022 08:27:27 +0200
Subject: [PATCH 31/31] Add missing calls to slurm_msg_set_r_uid()
Signed-off-by: Egbert Eich <eich@suse.com>
---
src/common/slurm_protocol_api.c | 8 ++++++++
src/slurmctld/backup.c | 2 ++
src/slurmctld/controller.c | 2 ++
src/slurmctld/proc_req.c | 2 ++
4 files changed, 14 insertions(+)
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index e9f05c0319..938f7bb4c6 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -4338,6 +4338,14 @@ static void _rc_msg_setup(slurm_msg_t *msg, slurm_msg_t *resp_msg,
resp_msg->forward_struct = msg->forward_struct;
resp_msg->ret_list = msg->ret_list;
resp_msg->orig_addr = msg->orig_addr;
+ /* like _resp_msg_setup() */
+ if (!msg->auth_uid_set)
+ slurm_msg_set_r_uid(resp_msg, SLURM_AUTH_NOBODY);
+ else if ((msg->auth_uid != slurm_get_slurm_user_id()) &&
+ (msg->auth_uid != slurm_get_slurmd_user_id()))
+ slurm_msg_set_r_uid(resp_msg, msg->auth_uid);
+ else
+ slurm_msg_set_r_uid(resp_msg, SLURM_AUTH_UID_ANY);
}
diff --git a/src/slurmctld/backup.c b/src/slurmctld/backup.c
index 9b98594c9c..d9b296a180 100644
--- a/src/slurmctld/backup.c
+++ b/src/slurmctld/backup.c
@@ -445,6 +445,7 @@ static int _ping_controller(void)
unlock_slurmctld(config_read_lock);
req.msg_type = REQUEST_PING;
+ slurm_msg_set_r_uid(&req, SLURM_AUTH_UID_ANY);
if (slurm_send_recv_rc_msg_only_one(&req, &rc, 0) < 0) {
error("_ping_controller/slurm_send_node_msg error: %m");
@@ -500,6 +501,7 @@ static int _shutdown_primary_controller(int wait_time)
/* send request message */
req.msg_type = REQUEST_CONTROL;
+ slurm_msg_set_r_uid(&req, SLURM_AUTH_UID_ANY);
if (slurm_send_recv_rc_msg_only_one(&req, &rc,
(CONTROL_TIMEOUT * 1000)) < 0) {
diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c
index 3fbcb29f0e..9ade6d7638 100644
--- a/src/slurmctld/controller.c
+++ b/src/slurmctld/controller.c
@@ -2567,6 +2567,7 @@ static int _shutdown_backup_controller(int wait_time)
/* send request message */
req.msg_type = REQUEST_CONTROL;
+ slurm_msg_set_r_uid(&req, SLURM_AUTH_UID_ANY);
if (slurm_send_recv_rc_msg_only_one(&req, &rc,
(CONTROL_TIMEOUT * 1000)) < 0) {
@@ -3103,6 +3104,7 @@ static int _ping_backup_controller(void)
unlock_slurmctld(config_read_lock);
req.msg_type = REQUEST_PING;
+ slurm_msg_set_r_uid(&req, SLURM_AUTH_UID_ANY);
if (slurm_send_recv_rc_msg_only_one(&req, &rc, 0) < 0) {
debug2("_ping_backup_controller/slurm_send_node_msg error: %m");
diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c
index 3d308f9055..2645a9b8ca 100644
--- a/src/slurmctld/proc_req.c
+++ b/src/slurmctld/proc_req.c
@@ -5245,6 +5245,8 @@ static void _slurm_rpc_block_info(slurm_msg_t * msg)
response_msg.protocol_version = msg->protocol_version;
response_msg.address = msg->address;
response_msg.conn = msg->conn;
+ if (msg->auth_uid_set)
+ slurm_msg_set_r_uid(&response_msg, msg->auth_uid);
response_msg.msg_type = RESPONSE_BLOCK_INFO;
response_msg.data = get_buf_data(buffer);
response_msg.data_size = get_buf_offset(buffer);
--
2.35.3