X-Git-Url: http://git.liburcu.org/?a=blobdiff_plain;f=lttng-statedump-impl.c;h=5511c7e82cf4da0a09056de5637ee4be9fd966a9;hb=4733b9ab0c355a3563af31d879356702710612b7;hp=37a4542e253ed35157746a84e2c776e80a278443;hpb=1c124020c743254923d8e76ab5dcd1f69709982e;p=lttng-modules.git diff --git a/lttng-statedump-impl.c b/lttng-statedump-impl.c index 37a4542e..5511c7e8 100644 --- a/lttng-statedump-impl.c +++ b/lttng-statedump-impl.c @@ -1,4 +1,5 @@ -/* +/* SPDX-License-Identifier: (GPL-2.0 or LGPL-2.1) + * * lttng-statedump.c * * Linux Trace Toolkit Next Generation Kernel State Dump @@ -6,20 +7,6 @@ * Copyright 2005 Jean-Hugues Deschenes * Copyright 2006-2012 Mathieu Desnoyers * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; only - * version 2.1 of the License. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - * * Changes: * Eric Clement: Add listing of network IP interface * 2006, 2007 Mathieu Desnoyers Fix kernel threads @@ -41,7 +28,6 @@ #include #include #include -#include #include #include #include @@ -50,13 +36,13 @@ #include #include #include -#include #include +#include #include #include #include #include -#include +#include #ifdef CONFIG_LTTNG_HAS_LIST_IRQ #include @@ -69,18 +55,99 @@ #define LTTNG_INSTRUMENTATION #include -DEFINE_TRACE(lttng_statedump_block_device); -DEFINE_TRACE(lttng_statedump_end); -DEFINE_TRACE(lttng_statedump_interrupt); -DEFINE_TRACE(lttng_statedump_file_descriptor); -DEFINE_TRACE(lttng_statedump_start); -DEFINE_TRACE(lttng_statedump_process_state); -DEFINE_TRACE(lttng_statedump_network_interface); +LTTNG_DEFINE_TRACE(lttng_statedump_block_device, + TP_PROTO(struct lttng_session *session, + dev_t dev, const char *diskname), + TP_ARGS(session, dev, diskname)); + +LTTNG_DEFINE_TRACE(lttng_statedump_end, + TP_PROTO(struct lttng_session *session), + TP_ARGS(session)); + +LTTNG_DEFINE_TRACE(lttng_statedump_interrupt, + TP_PROTO(struct lttng_session *session, + unsigned int irq, const char *chip_name, + struct irqaction *action), + TP_ARGS(session, irq, chip_name, action)); + +LTTNG_DEFINE_TRACE(lttng_statedump_file_descriptor, + TP_PROTO(struct lttng_session *session, + struct files_struct *files, + int fd, const char *filename, + unsigned int flags, fmode_t fmode), + TP_ARGS(session, files, fd, filename, flags, fmode)); + +LTTNG_DEFINE_TRACE(lttng_statedump_start, + TP_PROTO(struct lttng_session *session), + TP_ARGS(session)); + +LTTNG_DEFINE_TRACE(lttng_statedump_process_state, + TP_PROTO(struct lttng_session *session, + struct task_struct *p, + int type, int mode, int submode, int status, + struct files_struct *files), + TP_ARGS(session, p, type, mode, submode, status, files)); + +LTTNG_DEFINE_TRACE(lttng_statedump_process_pid_ns, + TP_PROTO(struct lttng_session *session, + struct task_struct *p, + struct pid_namespace *pid_ns), + TP_ARGS(session, p, pid_ns)); + +#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,6,0)) +LTTNG_DEFINE_TRACE(lttng_statedump_process_cgroup_ns, + TP_PROTO(struct lttng_session *session, + struct task_struct *p, + struct cgroup_namespace *cgroup_ns), + TP_ARGS(session, p, cgroup_ns)); +#endif + +LTTNG_DEFINE_TRACE(lttng_statedump_process_ipc_ns, + TP_PROTO(struct lttng_session *session, + struct task_struct *p, + struct ipc_namespace *ipc_ns), + TP_ARGS(session, p, ipc_ns)); + +#ifndef LTTNG_MNT_NS_MISSING_HEADER +LTTNG_DEFINE_TRACE(lttng_statedump_process_mnt_ns, + TP_PROTO(struct lttng_session *session, + struct task_struct *p, + struct mnt_namespace *mnt_ns), + TP_ARGS(session, p, mnt_ns)); +#endif + +LTTNG_DEFINE_TRACE(lttng_statedump_process_net_ns, + TP_PROTO(struct lttng_session *session, + struct task_struct *p, + struct net *net_ns), + TP_ARGS(session, p, net_ns)); + +LTTNG_DEFINE_TRACE(lttng_statedump_process_user_ns, + TP_PROTO(struct lttng_session *session, + struct task_struct *p, + struct user_namespace *user_ns), + TP_ARGS(session, p, user_ns)); + +LTTNG_DEFINE_TRACE(lttng_statedump_process_uts_ns, + TP_PROTO(struct lttng_session *session, + struct task_struct *p, + struct uts_namespace *uts_ns), + TP_ARGS(session, p, uts_ns)); + +LTTNG_DEFINE_TRACE(lttng_statedump_network_interface, + TP_PROTO(struct lttng_session *session, + struct net_device *dev, struct in_ifaddr *ifa), + TP_ARGS(session, dev, ifa)); + +#ifdef LTTNG_HAVE_STATEDUMP_CPU_TOPOLOGY +LTTNG_DEFINE_TRACE(lttng_statedump_cpu_topology, + TP_PROTO(struct lttng_session *session, struct cpuinfo_x86 *c), + TP_ARGS(session, c)); +#endif struct lttng_fd_ctx { char *page; struct lttng_session *session; - struct task_struct *p; struct files_struct *files; }; @@ -121,6 +188,61 @@ enum lttng_process_status { LTTNG_DEAD = 7, }; + +#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,11,0)) + +#define LTTNG_PART_STRUCT_TYPE struct block_device + +static +int lttng_get_part_name(struct gendisk *disk, struct block_device *part, char *name_buf) +{ + const char *p; + + p = bdevname(part, name_buf); + if (!p) + return -ENOSYS; + + return 0; +} + +static +dev_t lttng_get_part_devt(struct block_device *part) +{ + return part->bd_dev; +} + +#else + +#define LTTNG_PART_STRUCT_TYPE struct hd_struct + +static +int lttng_get_part_name(struct gendisk *disk, struct hd_struct *part, char *name_buf) +{ + const char *p; + struct block_device bdev; + + /* + * Create a partial 'struct blockdevice' to use + * 'bdevname()' which is a simple wrapper over + * 'disk_name()' but has the honor to be EXPORT_SYMBOL. + */ + bdev.bd_disk = disk; + bdev.bd_part = part; + + p = bdevname(&bdev, name_buf); + if (!p) + return -ENOSYS; + + return 0; +} + +static +dev_t lttng_get_part_devt(struct hd_struct *part) +{ + return part_devt(part); +} +#endif + static int lttng_enumerate_block_devices(struct lttng_session *session) { @@ -128,19 +250,23 @@ int lttng_enumerate_block_devices(struct lttng_session *session) struct device_type *ptr_disk_type; struct class_dev_iter iter; struct device *dev; + int ret = 0; ptr_block_class = wrapper_get_block_class(); - if (!ptr_block_class) - return -ENOSYS; + if (!ptr_block_class) { + ret = -ENOSYS; + goto end; + } ptr_disk_type = wrapper_get_disk_type(); if (!ptr_disk_type) { - return -ENOSYS; + ret = -ENOSYS; + goto end; } class_dev_iter_init(&iter, ptr_block_class, NULL, ptr_disk_type); while ((dev = class_dev_iter_next(&iter))) { struct disk_part_iter piter; struct gendisk *disk = dev_to_disk(dev); - struct hd_struct *part; + LTTNG_PART_STRUCT_TYPE *part; /* * Don't show empty devices or things that have been @@ -150,24 +276,32 @@ int lttng_enumerate_block_devices(struct lttng_session *session) (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)) continue; - disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); - while ((part = disk_part_iter_next(&piter))) { + /* + * The original 'disk_part_iter_init' returns void, but our + * wrapper can fail to lookup the original symbol. + */ + if (wrapper_disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0) < 0) { + ret = -ENOSYS; + goto iter_exit; + } + + while ((part = wrapper_disk_part_iter_next(&piter))) { char name_buf[BDEVNAME_SIZE]; - char *p; - p = wrapper_disk_name(disk, part->partno, name_buf); - if (!p) { - disk_part_iter_exit(&piter); - class_dev_iter_exit(&iter); - return -ENOSYS; + if (lttng_get_part_name(disk, part, name_buf) == -ENOSYS) { + wrapper_disk_part_iter_exit(&piter); + ret = -ENOSYS; + goto iter_exit; } trace_lttng_statedump_block_device(session, - part_devt(part), name_buf); + lttng_get_part_devt(part), name_buf); } - disk_part_iter_exit(&piter); + wrapper_disk_part_iter_exit(&piter); } +iter_exit: class_dev_iter_exit(&iter); - return 0; +end: + return ret; } #ifdef CONFIG_INET @@ -245,52 +379,50 @@ int lttng_dump_one_fd(const void *p, struct file *file, unsigned int fd) /* Make sure we give at least some info */ spin_lock(&dentry->d_lock); - trace_lttng_statedump_file_descriptor(ctx->session, ctx->p, fd, - dentry->d_name.name, flags, file->f_mode); + trace_lttng_statedump_file_descriptor(ctx->session, + ctx->files, fd, dentry->d_name.name, flags, + file->f_mode); spin_unlock(&dentry->d_lock); goto end; } - trace_lttng_statedump_file_descriptor(ctx->session, ctx->p, fd, s, - flags, file->f_mode); + trace_lttng_statedump_file_descriptor(ctx->session, + ctx->files, fd, s, flags, file->f_mode); end: return 0; } +/* Called with task lock held. */ static -void lttng_enumerate_task_fd(struct lttng_session *session, - struct task_struct *p, char *tmp) +void lttng_enumerate_files(struct lttng_session *session, + struct files_struct *files, + char *tmp) { - struct lttng_fd_ctx ctx = { .page = tmp, .session = session, .p = p }; - struct files_struct *files; + struct lttng_fd_ctx ctx = { .page = tmp, .session = session, .files = files, }; - task_lock(p); - files = p->files; - if (!files) - goto end; - ctx.files = files; lttng_iterate_fd(files, 0, lttng_dump_one_fd, &ctx); -end: - task_unlock(p); } +#ifdef LTTNG_HAVE_STATEDUMP_CPU_TOPOLOGY static -int lttng_enumerate_file_descriptors(struct lttng_session *session) +int lttng_enumerate_cpu_topology(struct lttng_session *session) { - struct task_struct *p; - char *tmp; + int cpu; + const cpumask_t *cpumask = cpu_possible_mask; - tmp = (char *) __get_free_page(GFP_KERNEL); - if (!tmp) - return -ENOMEM; + for (cpu = cpumask_first(cpumask); cpu < nr_cpu_ids; + cpu = cpumask_next(cpu, cpumask)) { + trace_lttng_statedump_cpu_topology(session, &cpu_data(cpu)); + } - /* Enumerate active file descriptors */ - rcu_read_lock(); - for_each_process(p) - lttng_enumerate_task_fd(session, p, tmp); - rcu_read_unlock(); - free_page((unsigned long) tmp); return 0; } +#else +static +int lttng_enumerate_cpu_topology(struct lttng_session *session) +{ + return 0; +} +#endif #if 0 /* @@ -342,10 +474,6 @@ int lttng_enumerate_vm_maps(struct lttng_session *session) #ifdef CONFIG_LTTNG_HAS_LIST_IRQ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39)) -#define irq_desc_get_chip(desc) get_irq_desc_chip(desc) -#endif - static int lttng_list_interrupts(struct lttng_session *session) { @@ -361,12 +489,12 @@ int lttng_list_interrupts(struct lttng_session *session) irq_desc_get_chip(desc)->name ? : "unnamed_irq_chip"; local_irq_save(flags); - wrapper_desc_spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); for (action = desc->action; action; action = action->next) { trace_lttng_statedump_interrupt(session, irq, irq_chip_name, action); } - wrapper_desc_spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); local_irq_restore(flags); } return 0; @@ -381,6 +509,10 @@ int lttng_list_interrupts(struct lttng_session *session) #endif /* + * Statedump the task's namespaces using the proc filesystem inode number as + * the unique identifier. The user and pid ns are nested and will be dumped + * recursively. + * * Called with task lock held. */ static @@ -391,23 +523,87 @@ void lttng_statedump_process_ns(struct lttng_session *session, enum lttng_execution_submode submode, enum lttng_process_status status) { + struct nsproxy *proxy; struct pid_namespace *pid_ns; + struct user_namespace *user_ns; + /* + * The pid and user namespaces are special, they are nested and + * accessed with specific functions instead of the nsproxy struct + * like the other namespaces. + */ pid_ns = task_active_pid_ns(p); do { - trace_lttng_statedump_process_state(session, - p, type, mode, submode, status, pid_ns); - pid_ns = pid_ns->parent; + trace_lttng_statedump_process_pid_ns(session, p, pid_ns); + pid_ns = pid_ns ? pid_ns->parent : NULL; } while (pid_ns); + + + user_ns = task_cred_xxx(p, user_ns); + do { + trace_lttng_statedump_process_user_ns(session, p, user_ns); + /* + * trace_lttng_statedump_process_user_ns() internally + * checks whether user_ns is NULL. While this does not + * appear to be a possible return value for + * task_cred_xxx(), err on the safe side and check + * for NULL here as well to be consistent with the + * paranoid behavior of + * trace_lttng_statedump_process_user_ns(). + */ + user_ns = user_ns ? user_ns->lttng_user_ns_parent : NULL; + } while (user_ns); + + /* + * Back and forth on locking strategy within Linux upstream for nsproxy. + * See Linux upstream commit 728dba3a39c66b3d8ac889ddbe38b5b1c264aec3 + * "namespaces: Use task_lock and not rcu to protect nsproxy" + * for details. + */ +#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(3,17,0) || \ + LTTNG_UBUNTU_KERNEL_RANGE(3,13,11,36, 3,14,0,0) || \ + LTTNG_UBUNTU_KERNEL_RANGE(3,16,1,11, 3,17,0,0) || \ + LTTNG_RHEL_KERNEL_RANGE(3,10,0,229,13,0, 3,11,0,0,0,0)) + proxy = p->nsproxy; +#else + rcu_read_lock(); + proxy = task_nsproxy(p); +#endif + if (proxy) { +#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,6,0)) + trace_lttng_statedump_process_cgroup_ns(session, p, proxy->cgroup_ns); +#endif + trace_lttng_statedump_process_ipc_ns(session, p, proxy->ipc_ns); +#ifndef LTTNG_MNT_NS_MISSING_HEADER + trace_lttng_statedump_process_mnt_ns(session, p, proxy->mnt_ns); +#endif + trace_lttng_statedump_process_net_ns(session, p, proxy->net_ns); + trace_lttng_statedump_process_uts_ns(session, p, proxy->uts_ns); + } +#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(3,17,0) || \ + LTTNG_UBUNTU_KERNEL_RANGE(3,13,11,36, 3,14,0,0) || \ + LTTNG_UBUNTU_KERNEL_RANGE(3,16,1,11, 3,17,0,0) || \ + LTTNG_RHEL_KERNEL_RANGE(3,10,0,229,13,0, 3,11,0,0,0,0)) + /* (nothing) */ +#else + rcu_read_unlock(); +#endif } static int lttng_enumerate_process_states(struct lttng_session *session) { struct task_struct *g, *p; + char *tmp; + + tmp = (char *) __get_free_page(GFP_KERNEL); + if (!tmp) + return -ENOMEM; rcu_read_lock(); for_each_process(g) { + struct files_struct *prev_files = NULL; + p = g; do { enum lttng_execution_mode mode = @@ -416,6 +612,7 @@ int lttng_enumerate_process_states(struct lttng_session *session) LTTNG_UNKNOWN; enum lttng_process_status status; enum lttng_thread_type type; + struct files_struct *files; task_lock(p); if (p->exit_state == EXIT_ZOMBIE) @@ -450,13 +647,31 @@ int lttng_enumerate_process_states(struct lttng_session *session) type = LTTNG_USER_THREAD; else type = LTTNG_KERNEL_THREAD; + files = p->files; + + trace_lttng_statedump_process_state(session, + p, type, mode, submode, status, files); lttng_statedump_process_ns(session, p, type, mode, submode, status); + /* + * As an optimisation for the common case, do not + * repeat information for the same files_struct in + * two consecutive threads. This is the common case + * for threads sharing the same fd table. RCU guarantees + * that the same files_struct pointer is not re-used + * throughout processes/threads iteration. + */ + if (files && files != prev_files) { + lttng_enumerate_files(session, files, tmp); + prev_files = files; + } task_unlock(p); } while_each_thread(g, p); } rcu_read_unlock(); + free_page((unsigned long) tmp); + return 0; } @@ -475,9 +690,6 @@ int do_lttng_statedump(struct lttng_session *session) trace_lttng_statedump_start(session); ret = lttng_enumerate_process_states(session); - if (ret) - return ret; - ret = lttng_enumerate_file_descriptors(session); if (ret) return ret; /* @@ -502,6 +714,9 @@ int do_lttng_statedump(struct lttng_session *session) default: return ret; } + ret = lttng_enumerate_cpu_topology(session); + if (ret) + return ret; /* TODO lttng_dump_idt_table(session); */ /* TODO lttng_dump_softirq_vec(session); */