mirror of
https://github.com/systemd/systemd
synced 2025-10-06 00:13:24 +02:00
Merge remote-tracking branch 'systemd-security/coredump-d'
This commit is contained in:
@@ -229,6 +229,17 @@ COREDUMP_FILENAME=/var/lib/systemd/coredump/core.Web….552351.….zst
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>COREDUMP_BY_PIDFD=</varname></term>
|
||||
<listitem><para>If the crashed process was analyzed using a PIDFD provided by the kernel (requires
|
||||
kernel v6.16) then this field will be present and set to <literal>1</literal>. If this field is
|
||||
not set, then the crashed process was analyzed via a PID, which is known to be subject to race
|
||||
conditions.</para>
|
||||
|
||||
<xi:include href="version-info.xml" xpointer="v258"/>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>COREDUMP_TIMESTAMP=</varname></term>
|
||||
<listitem><para>The time of the crash as reported by the kernel (in μs since the epoch).</para>
|
||||
@@ -329,6 +340,18 @@ COREDUMP_FILENAME=/var/lib/systemd/coredump/core.Web….552351.….zst
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>COREDUMP_DUMPABLE=</varname></term>
|
||||
|
||||
<listitem><para>The <constant>PR_GET_DUMPABLE</constant> field as reported by the kernel, see
|
||||
<citerefentry
|
||||
project='man-pages'><refentrytitle>prctl</refentrytitle><manvolnum>2</manvolnum></citerefentry>.
|
||||
</para>
|
||||
|
||||
<xi:include href="version-info.xml" xpointer="v258"/>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>COREDUMP_OPEN_FDS=</varname></term>
|
||||
|
||||
|
@@ -3,7 +3,6 @@
|
||||
#include <elf.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/mount.h>
|
||||
#include <sys/prctl.h>
|
||||
#include <sys/statvfs.h>
|
||||
#include <sys/xattr.h>
|
||||
#include <unistd.h>
|
||||
@@ -87,7 +86,7 @@ assert_cc(JOURNAL_SIZE_MAX <= DATA_SIZE_MAX);
|
||||
|
||||
#define MOUNT_TREE_ROOT "/run/systemd/mount-rootfs"
|
||||
|
||||
enum {
|
||||
typedef enum {
|
||||
/* We use these as array indexes for our process metadata cache.
|
||||
*
|
||||
* The first indices of the cache stores the same metadata as the ones passed by the kernel via
|
||||
@@ -103,9 +102,11 @@ enum {
|
||||
_META_ARGV_REQUIRED,
|
||||
/* The fields below were added to kernel/core_pattern at later points, so they might be missing. */
|
||||
META_ARGV_HOSTNAME = _META_ARGV_REQUIRED, /* %h: hostname */
|
||||
_META_ARGV_MAX,
|
||||
META_ARGV_DUMPABLE, /* %d: as set by the kernel */
|
||||
META_ARGV_PIDFD, /* %F: pidfd of the process, since v6.16 */
|
||||
/* If new fields are added, they should be added here, to maintain compatibility
|
||||
* with callers which don't know about the new fields. */
|
||||
_META_ARGV_MAX,
|
||||
|
||||
/* The following indexes are cached for a couple of special fields we use (and
|
||||
* thereby need to be retrieved quickly) for naming coredump files, and attaching
|
||||
@@ -113,16 +114,15 @@ enum {
|
||||
* environment. */
|
||||
|
||||
META_COMM = _META_ARGV_MAX,
|
||||
_META_MANDATORY_MAX,
|
||||
|
||||
/* The rest are similar to the previous ones except that we won't fail if one of
|
||||
* them is missing in a message sent over the socket. */
|
||||
|
||||
META_EXE = _META_MANDATORY_MAX,
|
||||
META_EXE,
|
||||
META_UNIT,
|
||||
META_PROC_AUXV,
|
||||
_META_MAX
|
||||
};
|
||||
} meta_argv_t;
|
||||
|
||||
static const char * const meta_field_names[_META_MAX] = {
|
||||
[META_ARGV_PID] = "COREDUMP_PID=",
|
||||
@@ -132,6 +132,8 @@ static const char * const meta_field_names[_META_MAX] = {
|
||||
[META_ARGV_TIMESTAMP] = "COREDUMP_TIMESTAMP=",
|
||||
[META_ARGV_RLIMIT] = "COREDUMP_RLIMIT=",
|
||||
[META_ARGV_HOSTNAME] = "COREDUMP_HOSTNAME=",
|
||||
[META_ARGV_DUMPABLE] = "COREDUMP_DUMPABLE=",
|
||||
[META_ARGV_PIDFD] = "COREDUMP_BY_PIDFD=",
|
||||
[META_COMM] = "COREDUMP_COMM=",
|
||||
[META_EXE] = "COREDUMP_EXE=",
|
||||
[META_UNIT] = "COREDUMP_UNIT=",
|
||||
@@ -142,10 +144,12 @@ typedef struct Context {
|
||||
PidRef pidref;
|
||||
uid_t uid;
|
||||
gid_t gid;
|
||||
unsigned dumpable;
|
||||
int signo;
|
||||
uint64_t rlimit;
|
||||
bool is_pid1;
|
||||
bool is_journald;
|
||||
bool got_pidfd;
|
||||
int mount_tree_fd;
|
||||
|
||||
/* These point into external memory, are not owned by this object */
|
||||
@@ -438,14 +442,16 @@ static int grant_user_access(int core_fd, const Context *context) {
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
/* We allow access if we got all the data and at_secure is not set and
|
||||
* the uid/gid matches euid/egid. */
|
||||
/* We allow access if %d/dumpable on the command line was exactly 1, we got all the data,
|
||||
* at_secure is not set, and the uid/gid match euid/egid. */
|
||||
bool ret =
|
||||
context->dumpable == SUID_DUMP_USER &&
|
||||
at_secure == 0 &&
|
||||
uid != UID_INVALID && euid != UID_INVALID && uid == euid &&
|
||||
gid != GID_INVALID && egid != GID_INVALID && gid == egid;
|
||||
log_debug("Will %s access (uid="UID_FMT " euid="UID_FMT " gid="GID_FMT " egid="GID_FMT " at_secure=%s)",
|
||||
log_debug("Will %s access (dumpable=%u uid="UID_FMT " euid="UID_FMT " gid="GID_FMT " egid="GID_FMT " at_secure=%s)",
|
||||
ret ? "permit" : "restrict",
|
||||
context->dumpable,
|
||||
uid, euid, gid, egid, yes_no(at_secure));
|
||||
return ret;
|
||||
}
|
||||
@@ -1027,7 +1033,6 @@ static int context_parse_iovw(Context *context, struct iovec_wrapper *iovw) {
|
||||
|
||||
assert(context);
|
||||
assert(iovw);
|
||||
assert(iovw->count >= _META_ARGV_MAX);
|
||||
|
||||
/* Converts the data in the iovec array iovw into separate fields. Fills in context->meta[] (for
|
||||
* which no memory is allocated, it just contains direct pointers into the iovec array memory). */
|
||||
@@ -1088,6 +1093,16 @@ static int context_parse_iovw(Context *context, struct iovec_wrapper *iovw) {
|
||||
if (r < 0)
|
||||
log_warning_errno(r, "Failed to parse resource limit \"%s\", ignoring: %m", context->meta[META_ARGV_RLIMIT]);
|
||||
|
||||
/* The value is set to contents of /proc/sys/fs/suid_dumpable, which we set to SUID_DUMP_SAFE (2),
|
||||
* if the process is marked as not dumpable, see PR_SET_DUMPABLE(2const). */
|
||||
if (context->meta[META_ARGV_DUMPABLE]) {
|
||||
r = safe_atou(context->meta[META_ARGV_DUMPABLE], &context->dumpable);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to parse dumpable field \"%s\": %m", context->meta[META_ARGV_DUMPABLE]);
|
||||
if (context->dumpable > SUID_DUMP_SAFE)
|
||||
log_notice("Got unexpected %%d/dumpable value %u.", context->dumpable);
|
||||
}
|
||||
|
||||
unit = context->meta[META_UNIT];
|
||||
context->is_pid1 = streq(context->meta[META_ARGV_PID], "1") || streq_ptr(unit, SPECIAL_INIT_SCOPE);
|
||||
context->is_journald = streq_ptr(unit, SPECIAL_JOURNALD_SERVICE);
|
||||
@@ -1228,10 +1243,24 @@ static int process_socket(int fd) {
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
/* Make sure we received at least all fields we need. */
|
||||
for (int i = 0; i < _META_MANDATORY_MAX; i++)
|
||||
/* Make sure we received all the expected fields. We support being called by an *older*
|
||||
* systemd-coredump from the outside, so we require only the basic set of fields that
|
||||
* was being sent when the support for sending to containers over a socket was added
|
||||
* in a108c43e36d3ceb6e34efe37c014fc2cda856000. */
|
||||
meta_argv_t i;
|
||||
FOREACH_ARGUMENT(i,
|
||||
META_ARGV_PID,
|
||||
META_ARGV_UID,
|
||||
META_ARGV_GID,
|
||||
META_ARGV_SIGNAL,
|
||||
META_ARGV_TIMESTAMP,
|
||||
META_ARGV_RLIMIT,
|
||||
META_ARGV_HOSTNAME,
|
||||
META_COMM)
|
||||
if (!context.meta[i])
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "A mandatory argument (%i) has not been sent, aborting.", i);
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
|
||||
"Mandatory argument %s not received on socket, aborting.",
|
||||
meta_field_names[i]);
|
||||
|
||||
return submit_coredump(&context, &iovw, input_fd);
|
||||
}
|
||||
@@ -1318,7 +1347,8 @@ static int gather_pid_metadata_from_argv(
|
||||
Context *context,
|
||||
int argc, char **argv) {
|
||||
|
||||
int r;
|
||||
_cleanup_(pidref_done) PidRef local_pidref = PIDREF_NULL;
|
||||
int r, kernel_fd = -EBADF;
|
||||
|
||||
assert(iovw);
|
||||
assert(context);
|
||||
@@ -1350,6 +1380,49 @@ static int gather_pid_metadata_from_argv(
|
||||
t = buf;
|
||||
}
|
||||
|
||||
if (i == META_ARGV_PID) {
|
||||
/* Store this so that we can check whether the core will be forwarded to a container
|
||||
* even when the kernel doesn't provide a pidfd. Can be dropped once baseline is
|
||||
* >= v6.16. */
|
||||
r = pidref_set_pidstr(&local_pidref, t);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to initialize pidref from pid %s: %m", t);
|
||||
}
|
||||
|
||||
if (i == META_ARGV_PIDFD) {
|
||||
/* If the current kernel doesn't support the %F specifier (which resolves to a
|
||||
* pidfd), but we included it in the core_pattern expression, we'll receive an empty
|
||||
* string here. Deal with that gracefully. */
|
||||
if (isempty(t))
|
||||
continue;
|
||||
|
||||
assert(!pidref_is_set(&context->pidref));
|
||||
assert(kernel_fd < 0);
|
||||
|
||||
kernel_fd = parse_fd(t);
|
||||
if (kernel_fd < 0)
|
||||
return log_error_errno(kernel_fd, "Failed to parse pidfd \"%s\": %m", t);
|
||||
|
||||
r = pidref_set_pidfd(&context->pidref, kernel_fd);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to initialize pidref from pidfd %d: %m", kernel_fd);
|
||||
|
||||
context->got_pidfd = 1;
|
||||
|
||||
/* If there are containers involved with different versions of the code they might
|
||||
* not be using pidfds, so it would be wrong to set the metadata, skip it. */
|
||||
r = pidref_in_same_namespace(/* pid1 = */ NULL, &context->pidref, NAMESPACE_PID);
|
||||
if (r < 0)
|
||||
log_debug_errno(r, "Failed to check pidns of crashing process, ignoring: %m");
|
||||
if (r <= 0)
|
||||
continue;
|
||||
|
||||
/* We don't print the fd number in the journal as it's meaningless, but we still
|
||||
* record that the parsing was done with a kernel-provided fd as it means it's safe
|
||||
* from races, which is valuable information to provide in the journal record. */
|
||||
t = "1";
|
||||
}
|
||||
|
||||
r = iovw_put_string_field(iovw, meta_field_names[i], t);
|
||||
if (r < 0)
|
||||
return r;
|
||||
@@ -1357,7 +1430,19 @@ static int gather_pid_metadata_from_argv(
|
||||
|
||||
/* Cache some of the process metadata we collected so far and that we'll need to
|
||||
* access soon. */
|
||||
return context_parse_iovw(context, iovw);
|
||||
r = context_parse_iovw(context, iovw);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
/* If the kernel didn't give us a PIDFD, then use the one derived from the
|
||||
* PID immediately, given we have it. */
|
||||
if (!pidref_is_set(&context->pidref))
|
||||
context->pidref = TAKE_PIDREF(local_pidref);
|
||||
|
||||
/* Close the kernel-provided FD as the last thing after everything else succeeded. */
|
||||
kernel_fd = safe_close(kernel_fd);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gather_pid_metadata_from_procfs(struct iovec_wrapper *iovw, Context *context) {
|
||||
@@ -1536,13 +1621,21 @@ static int receive_ucred(int transport_fd, struct ucred *ret_ucred) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int can_forward_coredump(const PidRef *pid) {
|
||||
static int can_forward_coredump(Context *context, const PidRef *pid) {
|
||||
_cleanup_free_ char *cgroup = NULL, *path = NULL, *unit = NULL;
|
||||
int r;
|
||||
|
||||
assert(context);
|
||||
assert(pidref_is_set(pid));
|
||||
assert(!pidref_is_remote(pid));
|
||||
|
||||
/* We need to avoid a situation where the attacker crashes a SUID process or a root daemon and
|
||||
* quickly replaces it with a namespaced process and we forward the coredump to the attacker, into
|
||||
* the namespace. With %F/pidfd we can reliably check the namespace of the original process, hence we
|
||||
* can allow forwarding. */
|
||||
if (!context->got_pidfd && context->dumpable != SUID_DUMP_USER)
|
||||
return false;
|
||||
|
||||
r = cg_pidref_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
|
||||
if (r < 0)
|
||||
return r;
|
||||
@@ -1587,7 +1680,7 @@ static int forward_coredump_to_container(Context *context) {
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to get namespace leader: %m");
|
||||
|
||||
r = can_forward_coredump(&leader_pid);
|
||||
r = can_forward_coredump(context, &leader_pid);
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to check if coredump can be forwarded: %m");
|
||||
if (r == 0)
|
||||
@@ -1930,7 +2023,7 @@ static int run(int argc, char *argv[]) {
|
||||
log_set_target_and_open(LOG_TARGET_KMSG);
|
||||
|
||||
/* Make sure we never enter a loop */
|
||||
(void) prctl(PR_SET_DUMPABLE, 0);
|
||||
(void) set_dumpable(SUID_DUMP_DISABLE);
|
||||
|
||||
/* Ignore all parse errors */
|
||||
(void) parse_config();
|
||||
|
@@ -1,9 +1,11 @@
|
||||
/* SPDX-License-Identifier: LGPL-2.1-or-later */
|
||||
|
||||
#include <elf.h>
|
||||
#include <sys/prctl.h>
|
||||
|
||||
#include "alloc-util.h"
|
||||
#include "coredump-util.h"
|
||||
#include "errno-util.h"
|
||||
#include "extract-word.h"
|
||||
#include "fileio.h"
|
||||
#include "log.h"
|
||||
@@ -14,6 +16,11 @@
|
||||
#include "unaligned.h"
|
||||
#include "virt.h"
|
||||
|
||||
int set_dumpable(SuidDumpMode mode) {
|
||||
/* Cast mode explicitly to long, because prctl wants longs but is varargs. */
|
||||
return RET_NERRNO(prctl(PR_SET_DUMPABLE, (long) mode));
|
||||
}
|
||||
|
||||
static const char *const coredump_filter_table[_COREDUMP_FILTER_MAX] = {
|
||||
[COREDUMP_FILTER_PRIVATE_ANONYMOUS] = "private-anonymous",
|
||||
[COREDUMP_FILTER_SHARED_ANONYMOUS] = "shared-anonymous",
|
||||
|
@@ -25,6 +25,15 @@ typedef enum CoredumpFilter {
|
||||
/* The kernel doesn't like UINT64_MAX and returns ERANGE, use UINT32_MAX to support future new flags */
|
||||
#define COREDUMP_FILTER_MASK_ALL UINT32_MAX
|
||||
|
||||
typedef enum SuidDumpMode {
|
||||
SUID_DUMP_DISABLE = 0, /* PR_SET_DUMPABLE(2const) */
|
||||
SUID_DUMP_USER = 1, /* PR_SET_DUMPABLE(2const) */
|
||||
SUID_DUMP_SAFE = 2, /* https://www.kernel.org/doc/html/latest/admin-guide/sysctl/fs.html#suid-dumpable */
|
||||
_SUID_DUMP_MODE_MAX,
|
||||
} SuidDumpMode;
|
||||
|
||||
int set_dumpable(SuidDumpMode mode);
|
||||
|
||||
const char* coredump_filter_to_string(CoredumpFilter i) _const_;
|
||||
CoredumpFilter coredump_filter_from_string(const char *s) _pure_;
|
||||
int coredump_filter_mask_from_string(const char *s, uint64_t *ret);
|
||||
|
@@ -6,12 +6,12 @@
|
||||
#include <elfutils/libdwfl.h>
|
||||
#include <libelf.h>
|
||||
#endif
|
||||
#include <sys/prctl.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "sd-json.h"
|
||||
|
||||
#include "alloc-util.h"
|
||||
#include "coredump-util.h"
|
||||
#include "dlfcn-util.h"
|
||||
#include "elf-util.h"
|
||||
#include "errno-util.h"
|
||||
@@ -829,7 +829,7 @@ int parse_elf_object(int fd, const char *executable, const char *root, bool fork
|
||||
if (r == 0) {
|
||||
/* We want to avoid loops, given this can be called from systemd-coredump */
|
||||
if (fork_disable_dump) {
|
||||
r = RET_NERRNO(prctl(PR_SET_DUMPABLE, 0));
|
||||
r = set_dumpable(SUID_DUMP_DISABLE);
|
||||
if (r < 0)
|
||||
report_errno_and_exit(error_pipe[1], r);
|
||||
}
|
||||
|
@@ -17,6 +17,7 @@
|
||||
#include "bus-wait-for-jobs.h"
|
||||
#include "cgroup-setup.h"
|
||||
#include "cgroup-util.h"
|
||||
#include "coredump-util.h"
|
||||
#include "env-file.h"
|
||||
#include "env-util.h"
|
||||
#include "errno-util.h"
|
||||
@@ -436,7 +437,8 @@ int assert_signal_internal(void) {
|
||||
|
||||
if (r == 0) {
|
||||
/* Speed things up by never even attempting to generate a coredump */
|
||||
(void) prctl(PR_SET_DUMPABLE, 0);
|
||||
(void) set_dumpable(SUID_DUMP_DISABLE);
|
||||
|
||||
/* But still set an rlimit just in case */
|
||||
(void) setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(0));
|
||||
return 0;
|
||||
|
@@ -13,7 +13,7 @@
|
||||
# the core dump.
|
||||
#
|
||||
# See systemd-coredump(8) and core(5).
|
||||
kernel.core_pattern=|{{LIBEXECDIR}}/systemd-coredump %P %u %g %s %t %c %h
|
||||
kernel.core_pattern=|{{LIBEXECDIR}}/systemd-coredump %P %u %g %s %t %c %h %d %F
|
||||
|
||||
# Allow 16 coredumps to be dispatched in parallel by the kernel.
|
||||
# We collect metadata from /proc/%P/, and thus need to make sure the crashed
|
||||
|
@@ -198,12 +198,17 @@ journalctl -b -n 1 --output=export --output-fields=MESSAGE,COREDUMP COREDUMP_EXE
|
||||
/usr/lib/systemd/systemd-coredump --backtrace $$ 0 0 6 1679509900 12345
|
||||
journalctl -b -n 1 --output=export --output-fields=MESSAGE,COREDUMP COREDUMP_EXE="/usr/bin/test-dump" |
|
||||
/usr/lib/systemd/systemd-coredump --backtrace $$ 0 0 6 1679509901 12345 mymachine
|
||||
journalctl -b -n 1 --output=export --output-fields=MESSAGE,COREDUMP COREDUMP_EXE="/usr/bin/test-dump" |
|
||||
/usr/lib/systemd/systemd-coredump --backtrace $$ 0 0 6 1679509902 12345 youmachine 1
|
||||
# Wait a bit for the coredumps to get processed
|
||||
timeout 30 bash -c "while [[ \$(coredumpctl list -q --no-legend $$ | wc -l) -lt 2 ]]; do sleep 1; done"
|
||||
coredumpctl info $$
|
||||
coredumpctl info COREDUMP_TIMESTAMP=1679509900000000
|
||||
coredumpctl info COREDUMP_TIMESTAMP=1679509901000000
|
||||
coredumpctl info COREDUMP_HOSTNAME="mymachine"
|
||||
coredumpctl info COREDUMP_TIMESTAMP=1679509902000000
|
||||
coredumpctl info COREDUMP_HOSTNAME="youmachine"
|
||||
coredumpctl info COREDUMP_DUMPABLE="1"
|
||||
|
||||
# This used to cause a stack overflow
|
||||
systemd-run -t --property CoredumpFilter=all ls /tmp
|
||||
|
Reference in New Issue
Block a user