X-Git-Url: https://git.liburcu.org/?a=blobdiff_plain;f=src%2Flttng-statedump-impl.c;h=2b42783a2c0617ae93b74a894c488a3f0f16fe80;hb=HEAD;hp=b3453bf5a8f21891c5ba4b75bd271317c96d3af4;hpb=5f4c791e2ad2c814101ccdb500e65543f2792c41;p=lttng-modules.git diff --git a/src/lttng-statedump-impl.c b/src/lttng-statedump-impl.c index b3453bf5..5ea790f5 100644 --- a/src/lttng-statedump-impl.c +++ b/src/lttng-statedump-impl.c @@ -22,31 +22,27 @@ #include #include #include +#include #include -#include #include #include -#include #include #include #include #include #include +#include + #include #include +#include +#include #include #include -#include -#include #include -#include -#include -#include - -#ifdef CONFIG_LTTNG_HAS_LIST_IRQ -#include -#endif +#include +#include /* Define the tracepoints, but do not build the probes */ #define CREATE_TRACE_POINTS @@ -56,104 +52,104 @@ #include LTTNG_DEFINE_TRACE(lttng_statedump_block_device, - TP_PROTO(struct lttng_session *session, + TP_PROTO(struct lttng_kernel_session *session, dev_t dev, const char *diskname), TP_ARGS(session, dev, diskname)); LTTNG_DEFINE_TRACE(lttng_statedump_end, - TP_PROTO(struct lttng_session *session), + TP_PROTO(struct lttng_kernel_session *session), TP_ARGS(session)); LTTNG_DEFINE_TRACE(lttng_statedump_interrupt, - TP_PROTO(struct lttng_session *session, + TP_PROTO(struct lttng_kernel_session *session, unsigned int irq, const char *chip_name, struct irqaction *action), TP_ARGS(session, irq, chip_name, action)); LTTNG_DEFINE_TRACE(lttng_statedump_file_descriptor, - TP_PROTO(struct lttng_session *session, + TP_PROTO(struct lttng_kernel_session *session, struct files_struct *files, int fd, const char *filename, unsigned int flags, fmode_t fmode), TP_ARGS(session, files, fd, filename, flags, fmode)); LTTNG_DEFINE_TRACE(lttng_statedump_start, - TP_PROTO(struct lttng_session *session), + TP_PROTO(struct lttng_kernel_session *session), TP_ARGS(session)); LTTNG_DEFINE_TRACE(lttng_statedump_process_state, - TP_PROTO(struct lttng_session *session, + TP_PROTO(struct lttng_kernel_session *session, struct task_struct *p, int type, int mode, int submode, int status, struct files_struct *files), TP_ARGS(session, p, type, mode, submode, status, files)); LTTNG_DEFINE_TRACE(lttng_statedump_process_pid_ns, - TP_PROTO(struct lttng_session *session, + TP_PROTO(struct lttng_kernel_session *session, struct task_struct *p, struct pid_namespace *pid_ns), TP_ARGS(session, p, pid_ns)); #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,6,0)) LTTNG_DEFINE_TRACE(lttng_statedump_process_cgroup_ns, - TP_PROTO(struct lttng_session *session, + TP_PROTO(struct lttng_kernel_session *session, struct task_struct *p, struct cgroup_namespace *cgroup_ns), TP_ARGS(session, p, cgroup_ns)); #endif LTTNG_DEFINE_TRACE(lttng_statedump_process_ipc_ns, - TP_PROTO(struct lttng_session *session, + TP_PROTO(struct lttng_kernel_session *session, struct task_struct *p, struct ipc_namespace *ipc_ns), TP_ARGS(session, p, ipc_ns)); #ifndef LTTNG_MNT_NS_MISSING_HEADER LTTNG_DEFINE_TRACE(lttng_statedump_process_mnt_ns, - TP_PROTO(struct lttng_session *session, + TP_PROTO(struct lttng_kernel_session *session, struct task_struct *p, struct mnt_namespace *mnt_ns), TP_ARGS(session, p, mnt_ns)); #endif LTTNG_DEFINE_TRACE(lttng_statedump_process_net_ns, - TP_PROTO(struct lttng_session *session, + TP_PROTO(struct lttng_kernel_session *session, struct task_struct *p, struct net *net_ns), TP_ARGS(session, p, net_ns)); LTTNG_DEFINE_TRACE(lttng_statedump_process_user_ns, - TP_PROTO(struct lttng_session *session, + TP_PROTO(struct lttng_kernel_session *session, struct task_struct *p, struct user_namespace *user_ns), TP_ARGS(session, p, user_ns)); LTTNG_DEFINE_TRACE(lttng_statedump_process_uts_ns, - TP_PROTO(struct lttng_session *session, + TP_PROTO(struct lttng_kernel_session *session, struct task_struct *p, struct uts_namespace *uts_ns), TP_ARGS(session, p, uts_ns)); LTTNG_DEFINE_TRACE(lttng_statedump_process_time_ns, - TP_PROTO(struct lttng_session *session, + TP_PROTO(struct lttng_kernel_session *session, struct task_struct *p, struct time_namespace *time_ns), TP_ARGS(session, p, time_ns)); LTTNG_DEFINE_TRACE(lttng_statedump_network_interface, - TP_PROTO(struct lttng_session *session, + TP_PROTO(struct lttng_kernel_session *session, struct net_device *dev, struct in_ifaddr *ifa), TP_ARGS(session, dev, ifa)); #ifdef LTTNG_HAVE_STATEDUMP_CPU_TOPOLOGY LTTNG_DEFINE_TRACE(lttng_statedump_cpu_topology, - TP_PROTO(struct lttng_session *session, struct cpuinfo_x86 *c), + TP_PROTO(struct lttng_kernel_session *session, struct cpuinfo_x86 *c), TP_ARGS(session, c)); #endif struct lttng_fd_ctx { char *page; - struct lttng_session *session; + struct lttng_kernel_session *session; struct files_struct *files; }; @@ -195,7 +191,30 @@ enum lttng_process_status { }; -#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,11,0)) +#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(6,0,0) || \ + LTTNG_RHEL_KERNEL_RANGE(5,14,0,163,0,0, 5,15,0,0,0,0)) + +#define LTTNG_PART_STRUCT_TYPE struct block_device + +static +int lttng_get_part_name(struct gendisk *disk, struct block_device *part, char *name_buf) +{ + int ret; + + ret = snprintf(name_buf, BDEVNAME_SIZE, "%pg", part); + if (ret < 0 || ret >= BDEVNAME_SIZE) + return -ENOSYS; + + return 0; +} + +static +dev_t lttng_get_part_devt(struct block_device *part) +{ + return part->bd_dev; +} + +#elif (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,11,0)) #define LTTNG_PART_STRUCT_TYPE struct block_device @@ -249,57 +268,103 @@ dev_t lttng_get_part_devt(struct hd_struct *part) } #endif +#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,12,0)) static -int lttng_enumerate_block_devices(struct lttng_session *session) +int lttng_statedump_each_block_device(struct lttng_kernel_session *session, struct gendisk *disk) +{ + struct block_device *part; + unsigned long idx; + int ret = 0; + + /* Include partition 0 */ + idx = 0; + + rcu_read_lock(); + xa_for_each(&disk->part_tbl, idx, part) { + char name_buf[BDEVNAME_SIZE]; + + /* Exclude non-partitions bdev and empty partitions. */ + if (bdev_is_partition(part) && !bdev_nr_sectors(part)) + continue; + + if (lttng_get_part_name(disk, part, name_buf) == -ENOSYS) { + ret = -ENOSYS; + goto end; + } + trace_lttng_statedump_block_device(session, lttng_get_part_devt(part), + name_buf); + } +end: + rcu_read_unlock(); + return ret; +} +#else +static +int lttng_statedump_each_block_device(struct lttng_kernel_session *session, struct gendisk *disk) +{ + struct disk_part_iter piter; + LTTNG_PART_STRUCT_TYPE *part; + + disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); + + while ((part = disk_part_iter_next(&piter))) { + char name_buf[BDEVNAME_SIZE]; + + if (lttng_get_part_name(disk, part, name_buf) == -ENOSYS) { + disk_part_iter_exit(&piter); + return -ENOSYS; + } + trace_lttng_statedump_block_device(session, lttng_get_part_devt(part), + name_buf); + } + disk_part_iter_exit(&piter); + + return 0; +} +#endif + +static +int lttng_enumerate_block_devices(struct lttng_kernel_session *session) { struct class *ptr_block_class; struct device_type *ptr_disk_type; struct class_dev_iter iter; struct device *dev; + int ret = 0; ptr_block_class = wrapper_get_block_class(); - if (!ptr_block_class) - return -ENOSYS; + if (!ptr_block_class) { + ret = -ENOSYS; + goto end; + } ptr_disk_type = wrapper_get_disk_type(); if (!ptr_disk_type) { - return -ENOSYS; + ret = -ENOSYS; + goto end; } class_dev_iter_init(&iter, ptr_block_class, NULL, ptr_disk_type); while ((dev = class_dev_iter_next(&iter))) { - struct disk_part_iter piter; struct gendisk *disk = dev_to_disk(dev); - LTTNG_PART_STRUCT_TYPE *part; /* * Don't show empty devices or things that have been * suppressed */ if (get_capacity(disk) == 0 || - (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)) + (disk->flags & LTTNG_GENHD_FL_HIDDEN)) continue; - disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); - while ((part = disk_part_iter_next(&piter))) { - char name_buf[BDEVNAME_SIZE]; - - if (lttng_get_part_name(disk, part, name_buf) == -ENOSYS) { - disk_part_iter_exit(&piter); - class_dev_iter_exit(&iter); - return -ENOSYS; - } - trace_lttng_statedump_block_device(session, - lttng_get_part_devt(part), name_buf); - } - disk_part_iter_exit(&piter); + ret = lttng_statedump_each_block_device(session, disk); } class_dev_iter_exit(&iter); - return 0; +end: + return ret; } #ifdef CONFIG_INET static -void lttng_enumerate_device(struct lttng_session *session, +void lttng_enumerate_device(struct lttng_kernel_session *session, struct net_device *dev) { struct in_device *in_dev; @@ -321,8 +386,22 @@ void lttng_enumerate_device(struct lttng_session *session, } } +#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(6,9,0)) +static +int lttng_enumerate_network_ip_interface(struct lttng_kernel_session *session) +{ + struct net_device *dev; + + rtnl_lock(); + for_each_netdev(&init_net, dev) + lttng_enumerate_device(session, dev); + rtnl_unlock(); + + return 0; +} +#else static -int lttng_enumerate_network_ip_interface(struct lttng_session *session) +int lttng_enumerate_network_ip_interface(struct lttng_kernel_session *session) { struct net_device *dev; @@ -333,9 +412,10 @@ int lttng_enumerate_network_ip_interface(struct lttng_session *session) return 0; } +#endif /* (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(6,9,0)) */ #else /* CONFIG_INET */ static inline -int lttng_enumerate_network_ip_interface(struct lttng_session *session) +int lttng_enumerate_network_ip_interface(struct lttng_kernel_session *session) { return 0; } @@ -355,17 +435,22 @@ int lttng_dump_one_fd(const void *p, struct file *file, unsigned int fd) */ flags &= ~FMODE_NONOTIFY; fdt = files_fdtable(ctx->files); + /* - * We need to check here again whether fd is within the fdt - * max_fds range, because we might be seeing a different - * files_fdtable() than iterate_fd(), assuming only RCU is - * protecting the read. In reality, iterate_fd() holds - * file_lock, which should ensure the fdt does not change while - * the lock is taken, but we are not aware whether this is - * guaranteed or not, so play safe. + * The fdt should only grow and iterate_fd() holds file_lock, which + * should ensure the fdt does not change while the lock is taken but be + * cautious and check anyway. */ - if (fd < fdt->max_fds && lttng_close_on_exec(fd, fdt)) + if (WARN_ON_ONCE(fd >= fdt->max_fds)) + return 0; + + if (lttng_close_on_exec(fd, ctx->files)) flags |= O_CLOEXEC; + + /* + * If d_path() failed to get a full path for the file, use the dentry + * name instead to at least get a filename. + */ if (IS_ERR(s)) { struct dentry *dentry = file->f_path.dentry; @@ -385,18 +470,18 @@ end: /* Called with task lock held. */ static -void lttng_enumerate_files(struct lttng_session *session, +void lttng_enumerate_files(struct lttng_kernel_session *session, struct files_struct *files, char *tmp) { struct lttng_fd_ctx ctx = { .page = tmp, .session = session, .files = files, }; - lttng_iterate_fd(files, 0, lttng_dump_one_fd, &ctx); + iterate_fd(files, 0, lttng_dump_one_fd, &ctx); } #ifdef LTTNG_HAVE_STATEDUMP_CPU_TOPOLOGY static -int lttng_enumerate_cpu_topology(struct lttng_session *session) +int lttng_enumerate_cpu_topology(struct lttng_kernel_session *session) { int cpu; const cpumask_t *cpumask = cpu_possible_mask; @@ -410,7 +495,7 @@ int lttng_enumerate_cpu_topology(struct lttng_session *session) } #else static -int lttng_enumerate_cpu_topology(struct lttng_session *session) +int lttng_enumerate_cpu_topology(struct lttng_kernel_session *session) { return 0; } @@ -423,7 +508,7 @@ int lttng_enumerate_cpu_topology(struct lttng_session *session) * iteration, but it is not exported to modules. */ static -void lttng_enumerate_task_vm_maps(struct lttng_session *session, +void lttng_enumerate_task_vm_maps(struct lttng_kernel_session *session, struct task_struct *p) { struct mm_struct *mm; @@ -440,7 +525,7 @@ void lttng_enumerate_task_vm_maps(struct lttng_session *session, down_read(&mm->mmap_sem); while (map) { if (map->vm_file) - ino = map->vm_file->lttng_f_dentry->d_inode->i_ino; + ino = map->vm_file->f_path.dentry->d_inode->i_ino; else ino = 0; trace_lttng_statedump_vm_map(session, p, map, ino); @@ -452,7 +537,7 @@ void lttng_enumerate_task_vm_maps(struct lttng_session *session, } static -int lttng_enumerate_vm_maps(struct lttng_session *session) +int lttng_enumerate_vm_maps(struct lttng_kernel_session *session) { struct task_struct *p; @@ -464,10 +549,8 @@ int lttng_enumerate_vm_maps(struct lttng_session *session) } #endif -#ifdef CONFIG_LTTNG_HAS_LIST_IRQ - static -int lttng_list_interrupts(struct lttng_session *session) +int lttng_list_interrupts(struct lttng_kernel_session *session) { unsigned int irq; unsigned long flags = 0; @@ -492,13 +575,6 @@ int lttng_list_interrupts(struct lttng_session *session) return 0; #undef irq_to_desc } -#else -static inline -int lttng_list_interrupts(struct lttng_session *session) -{ - return 0; -} -#endif /* * Statedump the task's namespaces using the proc filesystem inode number as @@ -508,7 +584,7 @@ int lttng_list_interrupts(struct lttng_session *session) * Called with task lock held. */ static -void lttng_statedump_process_ns(struct lttng_session *session, +void lttng_statedump_process_ns(struct lttng_kernel_session *session, struct task_struct *p, enum lttng_thread_type type, enum lttng_execution_mode mode, @@ -543,24 +619,11 @@ void lttng_statedump_process_ns(struct lttng_session *session, * paranoid behavior of * trace_lttng_statedump_process_user_ns(). */ - user_ns = user_ns ? user_ns->lttng_user_ns_parent : NULL; + user_ns = user_ns ? user_ns->parent : NULL; } while (user_ns); - /* - * Back and forth on locking strategy within Linux upstream for nsproxy. - * See Linux upstream commit 728dba3a39c66b3d8ac889ddbe38b5b1c264aec3 - * "namespaces: Use task_lock and not rcu to protect nsproxy" - * for details. - */ -#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(3,17,0) || \ - LTTNG_UBUNTU_KERNEL_RANGE(3,13,11,36, 3,14,0,0) || \ - LTTNG_UBUNTU_KERNEL_RANGE(3,16,1,11, 3,17,0,0) || \ - LTTNG_RHEL_KERNEL_RANGE(3,10,0,229,13,0, 3,11,0,0,0,0)) proxy = p->nsproxy; -#else - rcu_read_lock(); - proxy = task_nsproxy(p); -#endif + if (proxy) { #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,6,0)) trace_lttng_statedump_process_cgroup_ns(session, p, proxy->cgroup_ns); @@ -571,22 +634,15 @@ void lttng_statedump_process_ns(struct lttng_session *session, #endif trace_lttng_statedump_process_net_ns(session, p, proxy->net_ns); trace_lttng_statedump_process_uts_ns(session, p, proxy->uts_ns); -#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,6,0)) +#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,6,0) || \ + LTTNG_RHEL_KERNEL_RANGE(4,18,0,305,0,0, 4,19,0,0,0,0)) trace_lttng_statedump_process_time_ns(session, p, proxy->time_ns); #endif } -#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(3,17,0) || \ - LTTNG_UBUNTU_KERNEL_RANGE(3,13,11,36, 3,14,0,0) || \ - LTTNG_UBUNTU_KERNEL_RANGE(3,16,1,11, 3,17,0,0) || \ - LTTNG_RHEL_KERNEL_RANGE(3,10,0,229,13,0, 3,11,0,0,0,0)) - /* (nothing) */ -#else - rcu_read_unlock(); -#endif } static -int lttng_enumerate_process_states(struct lttng_session *session) +int lttng_enumerate_process_states(struct lttng_kernel_session *session) { struct task_struct *g, *p; char *tmp; @@ -614,7 +670,7 @@ int lttng_enumerate_process_states(struct lttng_session *session) status = LTTNG_ZOMBIE; else if (p->exit_state == EXIT_DEAD) status = LTTNG_DEAD; - else if (p->state == TASK_RUNNING) { + else if (lttng_task_is_running(p)) { /* Is this a forked child that has not run yet? */ if (list_empty(&p->rt.run_list)) status = LTTNG_WAIT_FORK; @@ -625,7 +681,7 @@ int lttng_enumerate_process_states(struct lttng_session *session) * was really running at this time. */ status = LTTNG_WAIT_CPU; - } else if (p->state & + } else if (lttng_get_task_state(p) & (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)) { /* Task is waiting for something to complete */ status = LTTNG_WAIT; @@ -679,7 +735,7 @@ void lttng_statedump_work_func(struct work_struct *work) } static -int do_lttng_statedump(struct lttng_session *session) +int do_lttng_statedump(struct lttng_kernel_session *session) { int cpu, ret; @@ -723,7 +779,7 @@ int do_lttng_statedump(struct lttng_session *session) * is to guarantee that each CPU has been in a state where is was in * syscall mode (i.e. not in a trap, an IRQ or a soft IRQ). */ - get_online_cpus(); + lttng_cpus_read_lock(); atomic_set(&kernel_threads_to_run, num_online_cpus()); for_each_online_cpu(cpu) { INIT_DELAYED_WORK(&cpu_work[cpu], lttng_statedump_work_func); @@ -731,7 +787,7 @@ int do_lttng_statedump(struct lttng_session *session) } /* Wait for all threads to run */ __wait_event(statedump_wq, (atomic_read(&kernel_threads_to_run) == 0)); - put_online_cpus(); + lttng_cpus_read_unlock(); /* Our work is done */ trace_lttng_statedump_end(session); return 0; @@ -740,7 +796,7 @@ int do_lttng_statedump(struct lttng_session *session) /* * Called with session mutex held. */ -int lttng_statedump_start(struct lttng_session *session) +int lttng_statedump_start(struct lttng_kernel_session *session) { return do_lttng_statedump(session); } @@ -749,13 +805,6 @@ EXPORT_SYMBOL_GPL(lttng_statedump_start); static int __init lttng_statedump_init(void) { - /* - * Allow module to load even if the fixup cannot be done. This - * will allow seemless transition when the underlying issue fix - * is merged into the Linux kernel, and when tracepoint.c - * "tracepoint_module_notify" is turned into a static function. - */ - (void) wrapper_lttng_fixup_sig(THIS_MODULE); return 0; }