1 /* SPDX-License-Identifier: (GPL-2.0-only or LGPL-2.1-only)
5 * Linux Trace Toolkit Next Generation Kernel State Dump
7 * Copyright 2005 Jean-Hugues Deschenes <jean-hugues.deschenes@polymtl.ca>
8 * Copyright 2006-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
11 * Eric Clement: Add listing of network IP interface
12 * 2006, 2007 Mathieu Desnoyers Fix kernel threads
16 #include <linux/init.h>
17 #include <linux/module.h>
18 #include <linux/netlink.h>
19 #include <linux/inet.h>
21 #include <linux/kthread.h>
22 #include <linux/proc_fs.h>
23 #include <linux/file.h>
24 #include <linux/interrupt.h>
25 #include <linux/irqnr.h>
26 #include <linux/cpu.h>
27 #include <linux/netdevice.h>
28 #include <linux/inetdevice.h>
29 #include <linux/sched.h>
31 #include <linux/swap.h>
32 #include <linux/wait.h>
33 #include <linux/mutex.h>
34 #include <linux/device.h>
35 #include <linux/fdtable.h>
36 #include <linux/irq.h>
37 #include <linux/genhd.h>
39 #include <lttng-events.h>
40 #include <lttng-tracer.h>
42 /* Define the tracepoints, but do not build the probes */
43 #define CREATE_TRACE_POINTS
44 #define TRACE_INCLUDE_PATH instrumentation/events/lttng-module
45 #define TRACE_INCLUDE_FILE lttng-statedump
46 #define LTTNG_INSTRUMENTATION
47 #include <instrumentation/events/lttng-module/lttng-statedump.h>
49 DEFINE_TRACE(lttng_statedump_block_device
);
50 DEFINE_TRACE(lttng_statedump_end
);
51 DEFINE_TRACE(lttng_statedump_interrupt
);
52 DEFINE_TRACE(lttng_statedump_file_descriptor
);
53 DEFINE_TRACE(lttng_statedump_start
);
54 DEFINE_TRACE(lttng_statedump_process_state
);
55 DEFINE_TRACE(lttng_statedump_process_pid_ns
);
56 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,6,0))
57 DEFINE_TRACE(lttng_statedump_process_cgroup_ns
);
59 DEFINE_TRACE(lttng_statedump_process_ipc_ns
);
60 #ifndef LTTNG_MNT_NS_MISSING_HEADER
61 DEFINE_TRACE(lttng_statedump_process_mnt_ns
);
63 DEFINE_TRACE(lttng_statedump_process_net_ns
);
64 DEFINE_TRACE(lttng_statedump_process_user_ns
);
65 DEFINE_TRACE(lttng_statedump_process_uts_ns
);
66 DEFINE_TRACE(lttng_statedump_network_interface
);
67 #ifdef LTTNG_HAVE_STATEDUMP_CPU_TOPOLOGY
68 DEFINE_TRACE(lttng_statedump_cpu_topology
);
73 struct lttng_session
*session
;
74 struct files_struct
*files
;
78 * Protected by the trace lock.
80 static struct delayed_work cpu_work
[NR_CPUS
];
81 static DECLARE_WAIT_QUEUE_HEAD(statedump_wq
);
82 static atomic_t kernel_threads_to_run
;
84 enum lttng_thread_type
{
85 LTTNG_USER_THREAD
= 0,
86 LTTNG_KERNEL_THREAD
= 1,
89 enum lttng_execution_mode
{
95 LTTNG_MODE_UNKNOWN
= 5,
98 enum lttng_execution_submode
{
103 enum lttng_process_status
{
115 int lttng_enumerate_block_devices(struct lttng_session
*session
)
117 struct class_dev_iter iter
;
120 class_dev_iter_init(&iter
, gendisk_block_class(), NULL
,
121 gendisk_device_type());
122 while ((dev
= class_dev_iter_next(&iter
))) {
123 struct disk_part_iter piter
;
124 struct gendisk
*disk
= dev_to_disk(dev
);
125 struct hd_struct
*part
;
128 * Don't show empty devices or things that have been
131 if (get_capacity(disk
) == 0 ||
132 (disk
->flags
& GENHD_FL_SUPPRESS_PARTITION_INFO
))
135 disk_part_iter_init(&piter
, disk
, DISK_PITER_INCL_PART0
);
136 while ((part
= disk_part_iter_next(&piter
))) {
137 char name_buf
[BDEVNAME_SIZE
];
140 p
= gendisk_name(disk
, part
->partno
, name_buf
);
142 disk_part_iter_exit(&piter
);
143 class_dev_iter_exit(&iter
);
146 trace_lttng_statedump_block_device(session
,
147 part_devt(part
), name_buf
);
149 disk_part_iter_exit(&piter
);
151 class_dev_iter_exit(&iter
);
158 void lttng_enumerate_device(struct lttng_session
*session
,
159 struct net_device
*dev
)
161 struct in_device
*in_dev
;
162 struct in_ifaddr
*ifa
;
164 if (dev
->flags
& IFF_UP
) {
165 in_dev
= in_dev_get(dev
);
167 for (ifa
= in_dev
->ifa_list
; ifa
!= NULL
;
168 ifa
= ifa
->ifa_next
) {
169 trace_lttng_statedump_network_interface(
175 trace_lttng_statedump_network_interface(
181 int lttng_enumerate_network_ip_interface(struct lttng_session
*session
)
183 struct net_device
*dev
;
185 read_lock(&dev_base_lock
);
186 for_each_netdev(&init_net
, dev
)
187 lttng_enumerate_device(session
, dev
);
188 read_unlock(&dev_base_lock
);
192 #else /* CONFIG_INET */
194 int lttng_enumerate_network_ip_interface(struct lttng_session
*session
)
198 #endif /* CONFIG_INET */
201 int lttng_dump_one_fd(const void *p
, struct file
*file
, unsigned int fd
)
203 const struct lttng_fd_ctx
*ctx
= p
;
204 const char *s
= d_path(&file
->f_path
, ctx
->page
, PAGE_SIZE
);
205 unsigned int flags
= file
->f_flags
;
209 * We don't expose kernel internal flags, only userspace-visible
212 flags
&= ~FMODE_NONOTIFY
;
213 fdt
= files_fdtable(ctx
->files
);
215 * We need to check here again whether fd is within the fdt
216 * max_fds range, because we might be seeing a different
217 * files_fdtable() than iterate_fd(), assuming only RCU is
218 * protecting the read. In reality, iterate_fd() holds
219 * file_lock, which should ensure the fdt does not change while
220 * the lock is taken, but we are not aware whether this is
221 * guaranteed or not, so play safe.
223 if (fd
< fdt
->max_fds
&& close_on_exec(fd
, fdt
))
226 struct dentry
*dentry
= file
->f_path
.dentry
;
228 /* Make sure we give at least some info */
229 spin_lock(&dentry
->d_lock
);
230 trace_lttng_statedump_file_descriptor(ctx
->session
,
231 ctx
->files
, fd
, dentry
->d_name
.name
, flags
,
233 spin_unlock(&dentry
->d_lock
);
236 trace_lttng_statedump_file_descriptor(ctx
->session
,
237 ctx
->files
, fd
, s
, flags
, file
->f_mode
);
242 /* Called with task lock held. */
244 void lttng_enumerate_files(struct lttng_session
*session
,
245 struct files_struct
*files
,
248 struct lttng_fd_ctx ctx
= { .page
= tmp
, .session
= session
, .files
= files
, };
250 iterate_fd(files
, 0, lttng_dump_one_fd
, &ctx
);
253 #ifdef LTTNG_HAVE_STATEDUMP_CPU_TOPOLOGY
255 int lttng_enumerate_cpu_topology(struct lttng_session
*session
)
258 const cpumask_t
*cpumask
= cpu_possible_mask
;
260 for (cpu
= cpumask_first(cpumask
); cpu
< nr_cpu_ids
;
261 cpu
= cpumask_next(cpu
, cpumask
)) {
262 trace_lttng_statedump_cpu_topology(session
, &cpu_data(cpu
));
269 int lttng_enumerate_cpu_topology(struct lttng_session
*session
)
277 * FIXME: we cannot take a mmap_sem while in a RCU read-side critical section
278 * (scheduling in atomic). Normally, the tasklist lock protects this kind of
279 * iteration, but it is not exported to modules.
282 void lttng_enumerate_task_vm_maps(struct lttng_session
*session
,
283 struct task_struct
*p
)
285 struct mm_struct
*mm
;
286 struct vm_area_struct
*map
;
289 /* get_task_mm does a task_lock... */
296 down_read(&mm
->mmap_sem
);
299 ino
= map
->vm_file
->f_path
.dentry
->d_inode
->i_ino
;
302 trace_lttng_statedump_vm_map(session
, p
, map
, ino
);
305 up_read(&mm
->mmap_sem
);
311 int lttng_enumerate_vm_maps(struct lttng_session
*session
)
313 struct task_struct
*p
;
317 lttng_enumerate_task_vm_maps(session
, p
);
324 int lttng_list_interrupts(struct lttng_session
*session
)
327 unsigned long flags
= 0;
328 struct irq_desc
*desc
;
331 for_each_irq_desc(irq
, desc
) {
332 struct irqaction
*action
;
333 const char *irq_chip_name
=
334 irq_desc_get_chip(desc
)->name
? : "unnamed_irq_chip";
336 local_irq_save(flags
);
337 raw_spin_lock(&desc
->lock
);
338 for (action
= desc
->action
; action
; action
= action
->next
) {
339 trace_lttng_statedump_interrupt(session
,
340 irq
, irq_chip_name
, action
);
342 raw_spin_unlock(&desc
->lock
);
343 local_irq_restore(flags
);
349 * Statedump the task's namespaces using the proc filesystem inode number as
350 * the unique identifier. The user and pid ns are nested and will be dumped
353 * Called with task lock held.
356 void lttng_statedump_process_ns(struct lttng_session
*session
,
357 struct task_struct
*p
,
358 enum lttng_thread_type type
,
359 enum lttng_execution_mode mode
,
360 enum lttng_execution_submode submode
,
361 enum lttng_process_status status
)
363 struct nsproxy
*proxy
;
364 struct pid_namespace
*pid_ns
;
365 struct user_namespace
*user_ns
;
368 * The pid and user namespaces are special, they are nested and
369 * accessed with specific functions instead of the nsproxy struct
370 * like the other namespaces.
372 pid_ns
= task_active_pid_ns(p
);
374 trace_lttng_statedump_process_pid_ns(session
, p
, pid_ns
);
375 pid_ns
= pid_ns
? pid_ns
->parent
: NULL
;
379 user_ns
= task_cred_xxx(p
, user_ns
);
381 trace_lttng_statedump_process_user_ns(session
, p
, user_ns
);
383 * trace_lttng_statedump_process_user_ns() internally
384 * checks whether user_ns is NULL. While this does not
385 * appear to be a possible return value for
386 * task_cred_xxx(), err on the safe side and check
387 * for NULL here as well to be consistent with the
388 * paranoid behavior of
389 * trace_lttng_statedump_process_user_ns().
391 user_ns
= user_ns
? user_ns
->parent
: NULL
;
395 * Back and forth on locking strategy within Linux upstream for nsproxy.
396 * See Linux upstream commit 728dba3a39c66b3d8ac889ddbe38b5b1c264aec3
397 * "namespaces: Use task_lock and not rcu to protect nsproxy"
400 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0) || \
401 LTTNG_UBUNTU_KERNEL_RANGE(3,13,11,36, 3,14,0,0) || \
402 LTTNG_UBUNTU_KERNEL_RANGE(3,16,1,11, 3,17,0,0) || \
403 LTTNG_RHEL_KERNEL_RANGE(3,10,0,229,13,0, 3,11,0,0,0,0))
407 proxy
= task_nsproxy(p
);
410 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,6,0))
411 trace_lttng_statedump_process_cgroup_ns(session
, p
, proxy
->cgroup_ns
);
413 trace_lttng_statedump_process_ipc_ns(session
, p
, proxy
->ipc_ns
);
414 #ifndef LTTNG_MNT_NS_MISSING_HEADER
415 trace_lttng_statedump_process_mnt_ns(session
, p
, proxy
->mnt_ns
);
417 trace_lttng_statedump_process_net_ns(session
, p
, proxy
->net_ns
);
418 trace_lttng_statedump_process_uts_ns(session
, p
, proxy
->uts_ns
);
420 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0) || \
421 LTTNG_UBUNTU_KERNEL_RANGE(3,13,11,36, 3,14,0,0) || \
422 LTTNG_UBUNTU_KERNEL_RANGE(3,16,1,11, 3,17,0,0) || \
423 LTTNG_RHEL_KERNEL_RANGE(3,10,0,229,13,0, 3,11,0,0,0,0))
431 int lttng_enumerate_process_states(struct lttng_session
*session
)
433 struct task_struct
*g
, *p
;
436 tmp
= (char *) __get_free_page(GFP_KERNEL
);
441 for_each_process(g
) {
442 struct files_struct
*prev_files
= NULL
;
446 enum lttng_execution_mode mode
=
448 enum lttng_execution_submode submode
=
450 enum lttng_process_status status
;
451 enum lttng_thread_type type
;
452 struct files_struct
*files
;
455 if (p
->exit_state
== EXIT_ZOMBIE
)
456 status
= LTTNG_ZOMBIE
;
457 else if (p
->exit_state
== EXIT_DEAD
)
459 else if (p
->state
== TASK_RUNNING
) {
460 /* Is this a forked child that has not run yet? */
461 if (list_empty(&p
->rt
.run_list
))
462 status
= LTTNG_WAIT_FORK
;
465 * All tasks are considered as wait_cpu;
466 * the viewer will sort out if the task
467 * was really running at this time.
469 status
= LTTNG_WAIT_CPU
;
470 } else if (p
->state
&
471 (TASK_INTERRUPTIBLE
| TASK_UNINTERRUPTIBLE
)) {
472 /* Task is waiting for something to complete */
475 status
= LTTNG_UNNAMED
;
476 submode
= LTTNG_NONE
;
479 * Verification of t->mm is to filter out kernel
480 * threads; Viewer will further filter out if a
481 * user-space thread was in syscall mode or not.
484 type
= LTTNG_USER_THREAD
;
486 type
= LTTNG_KERNEL_THREAD
;
489 trace_lttng_statedump_process_state(session
,
490 p
, type
, mode
, submode
, status
, files
);
491 lttng_statedump_process_ns(session
,
492 p
, type
, mode
, submode
, status
);
494 * As an optimisation for the common case, do not
495 * repeat information for the same files_struct in
496 * two consecutive threads. This is the common case
497 * for threads sharing the same fd table. RCU guarantees
498 * that the same files_struct pointer is not re-used
499 * throughout processes/threads iteration.
501 if (files
&& files
!= prev_files
) {
502 lttng_enumerate_files(session
, files
, tmp
);
506 } while_each_thread(g
, p
);
510 free_page((unsigned long) tmp
);
516 void lttng_statedump_work_func(struct work_struct
*work
)
518 if (atomic_dec_and_test(&kernel_threads_to_run
))
519 /* If we are the last thread, wake up do_lttng_statedump */
520 wake_up(&statedump_wq
);
524 int do_lttng_statedump(struct lttng_session
*session
)
528 trace_lttng_statedump_start(session
);
529 ret
= lttng_enumerate_process_states(session
);
534 * ret = lttng_enumerate_vm_maps(session);
538 ret
= lttng_list_interrupts(session
);
541 ret
= lttng_enumerate_network_ip_interface(session
);
544 ret
= lttng_enumerate_block_devices(session
);
549 printk(KERN_WARNING
"LTTng: block device enumeration is not supported by kernel\n");
554 ret
= lttng_enumerate_cpu_topology(session
);
558 /* TODO lttng_dump_idt_table(session); */
559 /* TODO lttng_dump_softirq_vec(session); */
560 /* TODO lttng_list_modules(session); */
561 /* TODO lttng_dump_swap_files(session); */
564 * Fire off a work queue on each CPU. Their sole purpose in life
565 * is to guarantee that each CPU has been in a state where is was in
566 * syscall mode (i.e. not in a trap, an IRQ or a soft IRQ).
569 atomic_set(&kernel_threads_to_run
, num_online_cpus());
570 for_each_online_cpu(cpu
) {
571 INIT_DELAYED_WORK(&cpu_work
[cpu
], lttng_statedump_work_func
);
572 schedule_delayed_work_on(cpu
, &cpu_work
[cpu
], 0);
574 /* Wait for all threads to run */
575 __wait_event(statedump_wq
, (atomic_read(&kernel_threads_to_run
) == 0));
577 /* Our work is done */
578 trace_lttng_statedump_end(session
);
583 * Called with session mutex held.
585 int lttng_statedump_start(struct lttng_session
*session
)
587 return do_lttng_statedump(session
);
589 EXPORT_SYMBOL_GPL(lttng_statedump_start
);
592 int __init
lttng_statedump_init(void)
597 module_init(lttng_statedump_init
);
600 void __exit
lttng_statedump_exit(void)
604 module_exit(lttng_statedump_exit
);
606 MODULE_LICENSE("GPL and additional rights");
607 MODULE_AUTHOR("Jean-Hugues Deschenes");
608 MODULE_DESCRIPTION("LTTng statedump provider");
609 MODULE_VERSION(__stringify(LTTNG_MODULES_MAJOR_VERSION
) "."
610 __stringify(LTTNG_MODULES_MINOR_VERSION
) "."
611 __stringify(LTTNG_MODULES_PATCHLEVEL_VERSION
)
612 LTTNG_MODULES_EXTRAVERSION
);