X-Git-Url: http://git.liburcu.org/?a=blobdiff_plain;f=lttng-statedump-impl.c;h=fef97812f2b87f2eb89e74783981ddc45441b1f2;hb=refs%2Fheads%2Fstable-2.11;hp=cfcf4febb97b3ceff2a2bcd7c28c53f2fe17cc3d;hpb=c337ddc219f608d4d35f461bdc9d2246324d6708;p=lttng-modules.git diff --git a/lttng-statedump-impl.c b/lttng-statedump-impl.c index cfcf4feb..fef97812 100644 --- a/lttng-statedump-impl.c +++ b/lttng-statedump-impl.c @@ -1,4 +1,7 @@ -/* +/* SPDX-License-Identifier: (GPL-2.0 or LGPL-2.1) + * + * lttng-statedump.c + * * Linux Trace Toolkit Next Generation Kernel State Dump * * Copyright 2005 Jean-Hugues Deschenes @@ -8,8 +11,6 @@ * Eric Clement: Add listing of network IP interface * 2006, 2007 Mathieu Desnoyers Fix kernel threads * Various updates - * - * Dual LGPL v2.1/GPL v2 license. */ #include @@ -25,25 +26,78 @@ #include #include #include -#include #include -#include #include #include #include - -#include "lttng-events.h" -#include "wrapper/irqdesc.h" - -#ifdef CONFIG_GENERIC_HARDIRQS +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_LTTNG_HAS_LIST_IRQ #include #endif /* Define the tracepoints, but do not build the probes */ #define CREATE_TRACE_POINTS -#define TRACE_INCLUDE_PATH ../instrumentation/events/lttng-module +#define TRACE_INCLUDE_PATH instrumentation/events/lttng-module #define TRACE_INCLUDE_FILE lttng-statedump -#include "instrumentation/events/lttng-module/lttng-statedump.h" +#define LTTNG_INSTRUMENTATION +#include + +LTTNG_DEFINE_TRACE(lttng_statedump_block_device, + TP_PROTO(struct lttng_session *session, + dev_t dev, const char *diskname), + TP_ARGS(session, dev, diskname)); + +LTTNG_DEFINE_TRACE(lttng_statedump_end, + TP_PROTO(struct lttng_session *session), + TP_ARGS(session)); + +LTTNG_DEFINE_TRACE(lttng_statedump_interrupt, + TP_PROTO(struct lttng_session *session, + unsigned int irq, const char *chip_name, + struct irqaction *action), + TP_ARGS(session, irq, chip_name, action)); + +LTTNG_DEFINE_TRACE(lttng_statedump_file_descriptor, + TP_PROTO(struct lttng_session *session, + struct task_struct *p, int fd, const char *filename, + unsigned int flags, fmode_t fmode), + TP_ARGS(session, p, fd, filename, flags, fmode)); + +LTTNG_DEFINE_TRACE(lttng_statedump_start, + TP_PROTO(struct lttng_session *session), + TP_ARGS(session)); + +LTTNG_DEFINE_TRACE(lttng_statedump_process_state, + TP_PROTO(struct lttng_session *session, + struct task_struct *p, + int type, int mode, int submode, int status, + struct pid_namespace *pid_ns), + TP_ARGS(session, p, type, mode, submode, status, pid_ns)); + +LTTNG_DEFINE_TRACE(lttng_statedump_network_interface, + TP_PROTO(struct lttng_session *session, + struct net_device *dev, struct in_ifaddr *ifa), + TP_ARGS(session, dev, ifa)); + +struct lttng_fd_ctx { + char *page; + struct lttng_session *session; + struct task_struct *p; + struct files_struct *files; +}; /* * Protected by the trace lock. @@ -82,7 +136,156 @@ enum lttng_process_status { LTTNG_DEAD = 7, }; + +#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,11,0)) + +#define LTTNG_PART_STRUCT_TYPE struct block_device + +static +int lttng_get_part_name(struct gendisk *disk, struct block_device *part, char *name_buf) +{ + const char *p; + + p = bdevname(part, name_buf); + if (!p) + return -ENOSYS; + + return 0; +} + +static +dev_t lttng_get_part_devt(struct block_device *part) +{ + return part->bd_dev; +} + +#else + +#define LTTNG_PART_STRUCT_TYPE struct hd_struct + +static +int lttng_get_part_name(struct gendisk *disk, struct hd_struct *part, char *name_buf) +{ + const char *p; + struct block_device bdev; + + /* + * Create a partial 'struct blockdevice' to use + * 'bdevname()' which is a simple wrapper over + * 'disk_name()' but has the honor to be EXPORT_SYMBOL. + */ + bdev.bd_disk = disk; + bdev.bd_part = part; + + p = bdevname(&bdev, name_buf); + if (!p) + return -ENOSYS; + + return 0; +} + +static +dev_t lttng_get_part_devt(struct hd_struct *part) +{ + return part_devt(part); +} +#endif + +#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,12,0)) +static +int lttng_statedump_each_block_device(struct lttng_session *session, struct gendisk *disk) +{ + struct block_device *part; + unsigned long idx; + int ret = 0; + + /* Include partition 0 */ + idx = 0; + + rcu_read_lock(); + xa_for_each(&disk->part_tbl, idx, part) { + char name_buf[BDEVNAME_SIZE]; + + /* Exclude non-partitions bdev and empty partitions. */ + if (bdev_is_partition(part) && !bdev_nr_sectors(part)) + continue; + + if (lttng_get_part_name(disk, part, name_buf) == -ENOSYS) { + ret = -ENOSYS; + goto end; + } + trace_lttng_statedump_block_device(session, lttng_get_part_devt(part), + name_buf); + } +end: + rcu_read_unlock(); + return ret; +} +#else +static +int lttng_statedump_each_block_device(struct lttng_session *session, struct gendisk *disk) +{ + struct disk_part_iter piter; + LTTNG_PART_STRUCT_TYPE *part; + + disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); + + while ((part = disk_part_iter_next(&piter))) { + char name_buf[BDEVNAME_SIZE]; + + if (lttng_get_part_name(disk, part, name_buf) == -ENOSYS) { + disk_part_iter_exit(&piter); + return -ENOSYS; + } + trace_lttng_statedump_block_device(session, lttng_get_part_devt(part), + name_buf); + } + disk_part_iter_exit(&piter); + + return 0; +} +#endif + +static +int lttng_enumerate_block_devices(struct lttng_session *session) +{ + struct class *ptr_block_class; + struct device_type *ptr_disk_type; + struct class_dev_iter iter; + struct device *dev; + int ret = 0; + + ptr_block_class = wrapper_get_block_class(); + if (!ptr_block_class) { + ret = -ENOSYS; + goto end; + } + ptr_disk_type = wrapper_get_disk_type(); + if (!ptr_disk_type) { + ret = -ENOSYS; + goto end; + } + class_dev_iter_init(&iter, ptr_block_class, NULL, ptr_disk_type); + while ((dev = class_dev_iter_next(&iter))) { + struct gendisk *disk = dev_to_disk(dev); + + /* + * Don't show empty devices or things that have been + * suppressed + */ + if (get_capacity(disk) == 0 || + (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)) + continue; + + ret = lttng_statedump_each_block_device(session, disk); + } + class_dev_iter_exit(&iter); +end: + return ret; +} + #ifdef CONFIG_INET + static void lttng_enumerate_device(struct lttng_session *session, struct net_device *dev) @@ -126,34 +329,61 @@ int lttng_enumerate_network_ip_interface(struct lttng_session *session) } #endif /* CONFIG_INET */ +static +int lttng_dump_one_fd(const void *p, struct file *file, unsigned int fd) +{ + const struct lttng_fd_ctx *ctx = p; + const char *s = d_path(&file->f_path, ctx->page, PAGE_SIZE); + unsigned int flags = file->f_flags; + struct fdtable *fdt; + + /* + * We don't expose kernel internal flags, only userspace-visible + * flags. + */ + flags &= ~FMODE_NONOTIFY; + fdt = files_fdtable(ctx->files); + /* + * We need to check here again whether fd is within the fdt + * max_fds range, because we might be seeing a different + * files_fdtable() than iterate_fd(), assuming only RCU is + * protecting the read. In reality, iterate_fd() holds + * file_lock, which should ensure the fdt does not change while + * the lock is taken, but we are not aware whether this is + * guaranteed or not, so play safe. + */ + if (fd < fdt->max_fds && lttng_close_on_exec(fd, fdt)) + flags |= O_CLOEXEC; + if (IS_ERR(s)) { + struct dentry *dentry = file->f_path.dentry; + + /* Make sure we give at least some info */ + spin_lock(&dentry->d_lock); + trace_lttng_statedump_file_descriptor(ctx->session, ctx->p, fd, + dentry->d_name.name, flags, file->f_mode); + spin_unlock(&dentry->d_lock); + goto end; + } + trace_lttng_statedump_file_descriptor(ctx->session, ctx->p, fd, s, + flags, file->f_mode); +end: + return 0; +} static void lttng_enumerate_task_fd(struct lttng_session *session, struct task_struct *p, char *tmp) { - struct fdtable *fdt; - struct file *filp; - unsigned int i; - const unsigned char *path; + struct lttng_fd_ctx ctx = { .page = tmp, .session = session, .p = p }; + struct files_struct *files; task_lock(p); - if (!p->files) - goto unlock_task; - spin_lock(&p->files->file_lock); - fdt = files_fdtable(p->files); - for (i = 0; i < fdt->max_fds; i++) { - filp = fcheck_files(p->files, i); - if (!filp) - continue; - path = d_path(&filp->f_path, tmp, PAGE_SIZE); - /* Make sure we give at least some info */ - trace_lttng_statedump_file_descriptor(session, p, i, - IS_ERR(path) ? - filp->f_dentry->d_name.name : - path); - } - spin_unlock(&p->files->file_lock); -unlock_task: + files = p->files; + if (!files) + goto end; + ctx.files = files; + lttng_iterate_fd(files, 0, lttng_dump_one_fd, &ctx); +end: task_unlock(p); } @@ -161,7 +391,11 @@ static int lttng_enumerate_file_descriptors(struct lttng_session *session) { struct task_struct *p; - char *tmp = (char *) __get_free_page(GFP_KERNEL); + char *tmp; + + tmp = (char *) __get_free_page(GFP_KERNEL); + if (!tmp) + return -ENOMEM; /* Enumerate active file descriptors */ rcu_read_lock(); @@ -172,6 +406,12 @@ int lttng_enumerate_file_descriptors(struct lttng_session *session) return 0; } +#if 0 +/* + * FIXME: we cannot take a mmap_sem while in a RCU read-side critical section + * (scheduling in atomic). Normally, the tasklist lock protects this kind of + * iteration, but it is not exported to modules. + */ static void lttng_enumerate_task_vm_maps(struct lttng_session *session, struct task_struct *p) @@ -190,7 +430,7 @@ void lttng_enumerate_task_vm_maps(struct lttng_session *session, down_read(&mm->mmap_sem); while (map) { if (map->vm_file) - ino = map->vm_file->f_dentry->d_inode->i_ino; + ino = map->vm_file->lttng_f_dentry->d_inode->i_ino; else ino = 0; trace_lttng_statedump_vm_map(session, p, map, ino); @@ -212,10 +452,16 @@ int lttng_enumerate_vm_maps(struct lttng_session *session) rcu_read_unlock(); return 0; } +#endif + +#ifdef CONFIG_LTTNG_HAS_LIST_IRQ + +#if (LTTNG_LINUX_VERSION_CODE < LTTNG_KERNEL_VERSION(2,6,39)) +#define irq_desc_get_chip(desc) get_irq_desc_chip(desc) +#endif -#ifdef CONFIG_GENERIC_HARDIRQS static -void lttng_list_interrupts(struct lttng_session *session) +int lttng_list_interrupts(struct lttng_session *session) { unsigned int irq; unsigned long flags = 0; @@ -229,23 +475,46 @@ void lttng_list_interrupts(struct lttng_session *session) irq_desc_get_chip(desc)->name ? : "unnamed_irq_chip"; local_irq_save(flags); - raw_spin_lock(&desc->lock); + wrapper_desc_spin_lock(&desc->lock); for (action = desc->action; action; action = action->next) { trace_lttng_statedump_interrupt(session, irq, irq_chip_name, action); } - raw_spin_unlock(&desc->lock); + wrapper_desc_spin_unlock(&desc->lock); local_irq_restore(flags); } + return 0; #undef irq_to_desc } #else static inline -void list_interrupts(struct lttng_session *session) +int lttng_list_interrupts(struct lttng_session *session) { + return 0; } #endif +/* + * Called with task lock held. + */ +static +void lttng_statedump_process_ns(struct lttng_session *session, + struct task_struct *p, + enum lttng_thread_type type, + enum lttng_execution_mode mode, + enum lttng_execution_submode submode, + enum lttng_process_status status) +{ + struct pid_namespace *pid_ns; + + pid_ns = task_active_pid_ns(p); + do { + trace_lttng_statedump_process_state(session, + p, type, mode, submode, status, pid_ns); + pid_ns = pid_ns ? pid_ns->parent : NULL; + } while (pid_ns); +} + static int lttng_enumerate_process_states(struct lttng_session *session) { @@ -267,7 +536,7 @@ int lttng_enumerate_process_states(struct lttng_session *session) status = LTTNG_ZOMBIE; else if (p->exit_state == EXIT_DEAD) status = LTTNG_DEAD; - else if (p->state == TASK_RUNNING) { + else if (lttng_task_is_running(p)) { /* Is this a forked child that has not run yet? */ if (list_empty(&p->rt.run_list)) status = LTTNG_WAIT_FORK; @@ -278,7 +547,7 @@ int lttng_enumerate_process_states(struct lttng_session *session) * was really running at this time. */ status = LTTNG_WAIT_CPU; - } else if (p->state & + } else if (lttng_get_task_state(p) & (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)) { /* Task is waiting for something to complete */ status = LTTNG_WAIT; @@ -295,7 +564,7 @@ int lttng_enumerate_process_states(struct lttng_session *session) type = LTTNG_USER_THREAD; else type = LTTNG_KERNEL_THREAD; - trace_lttng_statedump_process_state(session, + lttng_statedump_process_ns(session, p, type, mode, submode, status); task_unlock(p); } while_each_thread(g, p); @@ -316,15 +585,37 @@ void lttng_statedump_work_func(struct work_struct *work) static int do_lttng_statedump(struct lttng_session *session) { - int cpu; + int cpu, ret; - printk(KERN_DEBUG "LTT state dump thread start\n"); trace_lttng_statedump_start(session); - lttng_enumerate_process_states(session); - lttng_enumerate_file_descriptors(session); - lttng_enumerate_vm_maps(session); - lttng_list_interrupts(session); - lttng_enumerate_network_ip_interface(session); + ret = lttng_enumerate_process_states(session); + if (ret) + return ret; + ret = lttng_enumerate_file_descriptors(session); + if (ret) + return ret; + /* + * FIXME + * ret = lttng_enumerate_vm_maps(session); + * if (ret) + * return ret; + */ + ret = lttng_list_interrupts(session); + if (ret) + return ret; + ret = lttng_enumerate_network_ip_interface(session); + if (ret) + return ret; + ret = lttng_enumerate_block_devices(session); + switch (ret) { + case 0: + break; + case -ENOSYS: + printk(KERN_WARNING "LTTng: block device enumeration is not supported by kernel\n"); + break; + default: + return ret; + } /* TODO lttng_dump_idt_table(session); */ /* TODO lttng_dump_softirq_vec(session); */ @@ -343,10 +634,9 @@ int do_lttng_statedump(struct lttng_session *session) schedule_delayed_work_on(cpu, &cpu_work[cpu], 0); } /* Wait for all threads to run */ - __wait_event(statedump_wq, (atomic_read(&kernel_threads_to_run) != 0)); + __wait_event(statedump_wq, (atomic_read(&kernel_threads_to_run) == 0)); put_online_cpus(); /* Our work is done */ - printk(KERN_DEBUG "LTT state dump end\n"); trace_lttng_statedump_end(session); return 0; } @@ -356,11 +646,36 @@ int do_lttng_statedump(struct lttng_session *session) */ int lttng_statedump_start(struct lttng_session *session) { - printk(KERN_DEBUG "LTTng: state dump begin\n"); return do_lttng_statedump(session); } EXPORT_SYMBOL_GPL(lttng_statedump_start); +static +int __init lttng_statedump_init(void) +{ + /* + * Allow module to load even if the fixup cannot be done. This + * will allow seemless transition when the underlying issue fix + * is merged into the Linux kernel, and when tracepoint.c + * "tracepoint_module_notify" is turned into a static function. + */ + (void) wrapper_lttng_fixup_sig(THIS_MODULE); + return 0; +} + +module_init(lttng_statedump_init); + +static +void __exit lttng_statedump_exit(void) +{ +} + +module_exit(lttng_statedump_exit); + MODULE_LICENSE("GPL and additional rights"); MODULE_AUTHOR("Jean-Hugues Deschenes"); -MODULE_DESCRIPTION("Linux Trace Toolkit Next Generation Statedump"); +MODULE_DESCRIPTION("LTTng statedump provider"); +MODULE_VERSION(__stringify(LTTNG_MODULES_MAJOR_VERSION) "." + __stringify(LTTNG_MODULES_MINOR_VERSION) "." + __stringify(LTTNG_MODULES_PATCHLEVEL_VERSION) + LTTNG_MODULES_EXTRAVERSION);