2 * Linux Trace Toolkit Kernel State Dump
5 * Jean-Hugues Deschenes <jean-hugues.deschenes@polymtl.ca>
8 * Eric Clement: Add listing of network IP interface
9 * 2006, 2007 Mathieu Desnoyers Fix kernel threads
12 * Dual LGPL v2.1/GPL v2 license.
15 #include <linux/init.h>
16 #include <linux/module.h>
17 #include <linux/netlink.h>
18 #include <linux/inet.h>
20 #include <linux/kthread.h>
21 #include <linux/proc_fs.h>
22 #include <linux/file.h>
23 #include <linux/interrupt.h>
24 #include <linux/irqnr.h>
25 #include <linux/cpu.h>
26 #include <linux/netdevice.h>
27 #include <linux/inetdevice.h>
28 #include <linux/sched.h>
30 #include <linux/marker.h>
31 #include <linux/fdtable.h>
32 #include <linux/swap.h>
33 #include <linux/wait.h>
34 #include <linux/mutex.h>
36 #include "ltt-tracer.h"
38 #ifdef CONFIG_GENERIC_HARDIRQS
39 #include <linux/irq.h>
42 #ifdef CONFIG_HAVE_KVM
46 #define NB_PROC_CHUNK 20
49 * Protected by the trace lock.
51 static struct delayed_work cpu_work
[NR_CPUS
];
52 static DECLARE_WAIT_QUEUE_HEAD(statedump_wq
);
53 static atomic_t kernel_threads_to_run
;
55 static void empty_cb(void *call_data
)
59 static DEFINE_MUTEX(statedump_cb_mutex
);
60 static void (*ltt_dump_kprobes_table_cb
)(void *call_data
) = empty_cb
;
62 enum lttng_thread_type
{
63 LTTNG_USER_THREAD
= 0,
64 LTTNG_KERNEL_THREAD
= 1,
67 enum lttng_execution_mode
{
73 LTTNG_MODE_UNKNOWN
= 5,
76 enum lttng_execution_submode
{
81 enum lttng_process_status
{
92 struct trace_enum_map
{
97 #ifdef CONFIG_HAVE_KVM
98 static const struct trace_enum_map vmx_kvm_exit_enum
[] = {
99 { EXIT_REASON_EXCEPTION_NMI
, "exception" },
100 { EXIT_REASON_EXTERNAL_INTERRUPT
, "ext_irq" },
101 { EXIT_REASON_TRIPLE_FAULT
, "triple_fault" },
102 { EXIT_REASON_PENDING_INTERRUPT
, "interrupt_window" },
103 { EXIT_REASON_NMI_WINDOW
, "nmi_window" },
104 { EXIT_REASON_TASK_SWITCH
, "task_switch" },
105 { EXIT_REASON_CPUID
, "cpuid" },
106 { EXIT_REASON_HLT
, "halt" },
107 { EXIT_REASON_INVLPG
, "invlpg" },
108 { EXIT_REASON_RDPMC
, "rdpmc" },
109 { EXIT_REASON_RDTSC
, "rdtsc" },
110 { EXIT_REASON_VMCALL
, "hypercall" },
111 { EXIT_REASON_VMCLEAR
, "vmclear" },
112 { EXIT_REASON_VMLAUNCH
, "vmlaunch" },
113 { EXIT_REASON_VMPTRLD
, "vmprtld" },
114 { EXIT_REASON_VMPTRST
, "vmptrst" },
115 { EXIT_REASON_VMREAD
, "vmread" },
116 { EXIT_REASON_VMRESUME
, "vmresume" },
117 { EXIT_REASON_VMWRITE
, "vmwrite" },
118 { EXIT_REASON_VMOFF
, "vmoff" },
119 { EXIT_REASON_VMON
, "vmon" },
120 { EXIT_REASON_CR_ACCESS
, "cr_access" },
121 { EXIT_REASON_DR_ACCESS
, "dr_access" },
122 { EXIT_REASON_IO_INSTRUCTION
, "io_instruction" },
123 { EXIT_REASON_MSR_READ
, "rdmsr" },
124 { EXIT_REASON_MSR_WRITE
, "wrmsr" },
125 { EXIT_REASON_MWAIT_INSTRUCTION
, "mwait_instruction" },
126 { EXIT_REASON_MONITOR_INSTRUCTION
, "monitor_instruction" },
127 { EXIT_REASON_PAUSE_INSTRUCTION
, "pause_instruction" },
128 { EXIT_REASON_MCE_DURING_VMENTRY
, "mce_during_vmentry" },
129 { EXIT_REASON_TPR_BELOW_THRESHOLD
, "tpr_below_thres" },
130 { EXIT_REASON_APIC_ACCESS
, "apic_access" },
131 { EXIT_REASON_EPT_VIOLATION
, "ept_violation" },
132 { EXIT_REASON_EPT_MISCONFIG
, "epg_misconfig" },
133 { EXIT_REASON_WBINVD
, "wbinvd" },
136 #endif /* CONFIG_HAVE_KVM */
138 static void ltt_dump_enum_tables(struct ltt_probe_private_data
*call_data
)
140 #ifdef CONFIG_HAVE_KVM
142 /* KVM exit reasons for VMX */
143 for(i
= 0; vmx_kvm_exit_enum
[i
].symbol
; i
++) {
144 __trace_mark(0, enum_tables
, vmx_kvm_exit
, call_data
,
145 "id %ld symbol %s", vmx_kvm_exit_enum
[i
].id
,
146 vmx_kvm_exit_enum
[i
].symbol
);
148 #endif /* CONFIG_HAVE_KVM */
152 static void ltt_enumerate_device(struct ltt_probe_private_data
*call_data
,
153 struct net_device
*dev
)
155 struct in_device
*in_dev
;
156 struct in_ifaddr
*ifa
;
158 if (dev
->flags
& IFF_UP
) {
159 in_dev
= in_dev_get(dev
);
161 for (ifa
= in_dev
->ifa_list
; ifa
!= NULL
;
163 __trace_mark(0, netif_state
,
164 network_ipv4_interface
,
166 "name %s address #n4u%lu up %d",
168 (unsigned long)ifa
->ifa_address
,
173 __trace_mark(0, netif_state
, network_ip_interface
,
174 call_data
, "name %s address #n4u%lu up %d",
179 ltt_enumerate_network_ip_interface(struct ltt_probe_private_data
*call_data
)
181 struct net_device
*dev
;
183 read_lock(&dev_base_lock
);
184 for_each_netdev(&init_net
, dev
)
185 ltt_enumerate_device(call_data
, dev
);
186 read_unlock(&dev_base_lock
);
190 #else /* CONFIG_INET */
192 ltt_enumerate_network_ip_interface(struct ltt_probe_private_data
*call_data
)
196 #endif /* CONFIG_INET */
200 ltt_enumerate_task_fd(struct ltt_probe_private_data
*call_data
,
201 struct task_struct
*t
, char *tmp
)
206 const unsigned char *path
;
211 spin_lock(&t
->files
->file_lock
);
212 fdt
= files_fdtable(t
->files
);
213 for (i
= 0; i
< fdt
->max_fds
; i
++) {
214 filp
= fcheck_files(t
->files
, i
);
217 path
= d_path(&filp
->f_path
, tmp
, PAGE_SIZE
);
218 /* Make sure we give at least some info */
219 __trace_mark(0, fd_state
, file_descriptor
, call_data
,
220 "filename %s pid %d fd %u",
221 (IS_ERR(path
))?(filp
->f_dentry
->d_name
.name
):(path
),
224 spin_unlock(&t
->files
->file_lock
);
228 ltt_enumerate_file_descriptors(struct ltt_probe_private_data
*call_data
)
230 struct task_struct
*t
= &init_task
;
231 char *tmp
= (char *)__get_free_page(GFP_KERNEL
);
233 /* Enumerate active file descriptors */
235 read_lock(&tasklist_lock
);
237 atomic_dec(&t
->usage
);
239 atomic_inc(&t
->usage
);
240 read_unlock(&tasklist_lock
);
242 ltt_enumerate_task_fd(call_data
, t
, tmp
);
244 } while (t
!= &init_task
);
245 free_page((unsigned long)tmp
);
250 ltt_enumerate_task_vm_maps(struct ltt_probe_private_data
*call_data
,
251 struct task_struct
*t
)
253 struct mm_struct
*mm
;
254 struct vm_area_struct
*map
;
257 /* get_task_mm does a task_lock... */
264 down_read(&mm
->mmap_sem
);
267 ino
= map
->vm_file
->f_dentry
->d_inode
->i_ino
;
270 __trace_mark(0, vm_state
, vm_map
, call_data
,
271 "pid %d start %lu end %lu flags %lu "
272 "pgoff %lu inode %lu",
273 t
->pid
, map
->vm_start
, map
->vm_end
,
274 map
->vm_flags
, map
->vm_pgoff
<< PAGE_SHIFT
,
278 up_read(&mm
->mmap_sem
);
284 ltt_enumerate_vm_maps(struct ltt_probe_private_data
*call_data
)
286 struct task_struct
*t
= &init_task
;
289 read_lock(&tasklist_lock
);
291 atomic_dec(&t
->usage
);
293 atomic_inc(&t
->usage
);
294 read_unlock(&tasklist_lock
);
295 ltt_enumerate_task_vm_maps(call_data
, t
);
296 } while (t
!= &init_task
);
300 #ifdef CONFIG_GENERIC_HARDIRQS
301 static inline void list_interrupts(struct ltt_probe_private_data
*call_data
)
304 unsigned long flags
= 0;
305 struct irq_desc
*desc
;
308 for_each_irq_desc(irq
, desc
) {
309 struct irqaction
*action
;
310 const char *irq_chip_name
=
311 desc
->chip
->name
? : "unnamed_irq_chip";
313 local_irq_save(flags
);
314 raw_spin_lock(&desc
->lock
);
315 for (action
= desc
->action
; action
; action
= action
->next
)
316 __trace_mark(0, irq_state
, interrupt
, call_data
,
317 "name %s action %s irq_id %u",
318 irq_chip_name
, action
->name
, irq
);
319 raw_spin_unlock(&desc
->lock
);
320 local_irq_restore(flags
);
324 static inline void list_interrupts(struct ltt_probe_private_data
*call_data
)
330 ltt_enumerate_process_states(struct ltt_probe_private_data
*call_data
)
332 struct task_struct
*t
= &init_task
;
333 struct task_struct
*p
= t
;
334 enum lttng_process_status status
;
335 enum lttng_thread_type type
;
336 enum lttng_execution_mode mode
;
337 enum lttng_execution_submode submode
;
340 mode
= LTTNG_MODE_UNKNOWN
;
341 submode
= LTTNG_UNKNOWN
;
343 read_lock(&tasklist_lock
);
344 if (t
!= &init_task
) {
345 atomic_dec(&t
->usage
);
352 atomic_inc(&t
->usage
);
353 read_unlock(&tasklist_lock
);
357 if (t
->exit_state
== EXIT_ZOMBIE
)
358 status
= LTTNG_ZOMBIE
;
359 else if (t
->exit_state
== EXIT_DEAD
)
361 else if (t
->state
== TASK_RUNNING
) {
362 /* Is this a forked child that has not run yet? */
363 if (list_empty(&t
->rt
.run_list
))
364 status
= LTTNG_WAIT_FORK
;
367 * All tasks are considered as wait_cpu;
368 * the viewer will sort out if the task was
369 * really running at this time.
371 status
= LTTNG_WAIT_CPU
;
372 } else if (t
->state
&
373 (TASK_INTERRUPTIBLE
| TASK_UNINTERRUPTIBLE
)) {
374 /* Task is waiting for something to complete */
377 status
= LTTNG_UNNAMED
;
378 submode
= LTTNG_NONE
;
381 * Verification of t->mm is to filter out kernel threads;
382 * Viewer will further filter out if a user-space thread was
383 * in syscall mode or not.
386 type
= LTTNG_USER_THREAD
;
388 type
= LTTNG_KERNEL_THREAD
;
390 __trace_mark(0, task_state
, process_state
, call_data
,
391 "pid %d parent_pid %d name %s type %d mode %d "
392 "submode %d status %d tgid %d",
393 t
->pid
, t
->parent
->pid
, t
->comm
,
394 type
, mode
, submode
, status
, t
->tgid
);
396 } while (t
!= &init_task
);
401 void ltt_statedump_register_kprobes_dump(void (*callback
)(void *call_data
))
403 mutex_lock(&statedump_cb_mutex
);
404 ltt_dump_kprobes_table_cb
= callback
;
405 mutex_unlock(&statedump_cb_mutex
);
407 EXPORT_SYMBOL_GPL(ltt_statedump_register_kprobes_dump
);
409 void ltt_statedump_unregister_kprobes_dump(void (*callback
)(void *call_data
))
411 mutex_lock(&statedump_cb_mutex
);
412 ltt_dump_kprobes_table_cb
= empty_cb
;
413 mutex_unlock(&statedump_cb_mutex
);
415 EXPORT_SYMBOL_GPL(ltt_statedump_unregister_kprobes_dump
);
417 void ltt_statedump_work_func(struct work_struct
*work
)
419 if (atomic_dec_and_test(&kernel_threads_to_run
))
420 /* If we are the last thread, wake up do_ltt_statedump */
421 wake_up(&statedump_wq
);
424 static int do_ltt_statedump(struct ltt_probe_private_data
*call_data
)
427 struct module
*cb_owner
;
429 printk(KERN_DEBUG
"LTT state dump thread start\n");
430 ltt_enumerate_process_states(call_data
);
431 ltt_enumerate_file_descriptors(call_data
);
432 list_modules(call_data
);
433 ltt_enumerate_vm_maps(call_data
);
434 list_interrupts(call_data
);
435 ltt_enumerate_network_ip_interface(call_data
);
436 ltt_dump_swap_files(call_data
);
437 ltt_dump_sys_call_table(call_data
);
438 ltt_dump_softirq_vec(call_data
);
439 ltt_dump_idt_table(call_data
);
440 ltt_dump_enum_tables(call_data
);
442 mutex_lock(&statedump_cb_mutex
);
444 cb_owner
= __module_address((unsigned long)ltt_dump_kprobes_table_cb
);
445 __module_get(cb_owner
);
446 ltt_dump_kprobes_table_cb(call_data
);
447 module_put(cb_owner
);
449 mutex_unlock(&statedump_cb_mutex
);
452 * Fire off a work queue on each CPU. Their sole purpose in life
453 * is to guarantee that each CPU has been in a state where is was in
454 * syscall mode (i.e. not in a trap, an IRQ or a soft IRQ).
457 atomic_set(&kernel_threads_to_run
, num_online_cpus());
458 for_each_online_cpu(cpu
) {
459 INIT_DELAYED_WORK(&cpu_work
[cpu
], ltt_statedump_work_func
);
460 schedule_delayed_work_on(cpu
, &cpu_work
[cpu
], 0);
462 /* Wait for all threads to run */
463 __wait_event(statedump_wq
, (atomic_read(&kernel_threads_to_run
) != 0));
465 /* Our work is done */
466 printk(KERN_DEBUG
"LTT state dump end\n");
467 __trace_mark(0, global_state
, statedump_end
,
468 call_data
, MARK_NOARGS
);
473 * Called with trace lock held.
475 int ltt_statedump_start(struct ltt_trace
*trace
)
477 struct ltt_probe_private_data call_data
;
478 printk(KERN_DEBUG
"LTT state dump begin\n");
480 call_data
.trace
= trace
;
481 call_data
.serializer
= NULL
;
482 return do_ltt_statedump(&call_data
);
485 static int __init
statedump_init(void)
488 printk(KERN_DEBUG
"LTT : State dump init\n");
489 ret
= ltt_module_register(LTT_FUNCTION_STATEDUMP
,
490 ltt_statedump_start
, THIS_MODULE
);
494 static void __exit
statedump_exit(void)
496 printk(KERN_DEBUG
"LTT : State dump exit\n");
497 ltt_module_unregister(LTT_FUNCTION_STATEDUMP
);
500 module_init(statedump_init
)
501 module_exit(statedump_exit
)
503 MODULE_LICENSE("GPL and additional rights");
504 MODULE_AUTHOR("Jean-Hugues Deschenes");
505 MODULE_DESCRIPTION("Linux Trace Toolkit Statedump");