Commit | Line | Data |
---|---|---|
1c8284eb MD |
1 | /* |
2 | * Linux Trace Toolkit Kernel State Dump | |
3 | * | |
4 | * Copyright 2005 - | |
5 | * Jean-Hugues Deschenes <jean-hugues.deschenes@polymtl.ca> | |
6 | * | |
7 | * Changes: | |
8 | * Eric Clement: Add listing of network IP interface | |
9 | * 2006, 2007 Mathieu Desnoyers Fix kernel threads | |
10 | * Various updates | |
11 | * | |
12 | * Dual LGPL v2.1/GPL v2 license. | |
13 | */ | |
14 | ||
15 | #include <linux/init.h> | |
16 | #include <linux/module.h> | |
17 | #include <linux/netlink.h> | |
18 | #include <linux/inet.h> | |
19 | #include <linux/ip.h> | |
20 | #include <linux/kthread.h> | |
21 | #include <linux/proc_fs.h> | |
22 | #include <linux/file.h> | |
23 | #include <linux/interrupt.h> | |
24 | #include <linux/irqnr.h> | |
25 | #include <linux/cpu.h> | |
26 | #include <linux/netdevice.h> | |
27 | #include <linux/inetdevice.h> | |
28 | #include <linux/sched.h> | |
29 | #include <linux/mm.h> | |
30 | #include <linux/marker.h> | |
31 | #include <linux/fdtable.h> | |
32 | #include <linux/swap.h> | |
33 | #include <linux/wait.h> | |
34 | #include <linux/mutex.h> | |
35 | ||
36 | #include "ltt-tracer.h" | |
37 | ||
38 | #ifdef CONFIG_GENERIC_HARDIRQS | |
39 | #include <linux/irq.h> | |
40 | #endif | |
41 | ||
d4697afa MD |
42 | #ifdef CONFIG_HAVE_KVM |
43 | #include <asm/vmx.h> | |
44 | #endif | |
45 | ||
1c8284eb MD |
46 | #define NB_PROC_CHUNK 20 |
47 | ||
48 | /* | |
49 | * Protected by the trace lock. | |
50 | */ | |
51 | static struct delayed_work cpu_work[NR_CPUS]; | |
52 | static DECLARE_WAIT_QUEUE_HEAD(statedump_wq); | |
53 | static atomic_t kernel_threads_to_run; | |
54 | ||
55 | static void empty_cb(void *call_data) | |
56 | { | |
57 | } | |
58 | ||
59 | static DEFINE_MUTEX(statedump_cb_mutex); | |
60 | static void (*ltt_dump_kprobes_table_cb)(void *call_data) = empty_cb; | |
61 | ||
62 | enum lttng_thread_type { | |
63 | LTTNG_USER_THREAD = 0, | |
64 | LTTNG_KERNEL_THREAD = 1, | |
65 | }; | |
66 | ||
67 | enum lttng_execution_mode { | |
68 | LTTNG_USER_MODE = 0, | |
69 | LTTNG_SYSCALL = 1, | |
70 | LTTNG_TRAP = 2, | |
71 | LTTNG_IRQ = 3, | |
72 | LTTNG_SOFTIRQ = 4, | |
73 | LTTNG_MODE_UNKNOWN = 5, | |
74 | }; | |
75 | ||
76 | enum lttng_execution_submode { | |
77 | LTTNG_NONE = 0, | |
78 | LTTNG_UNKNOWN = 1, | |
79 | }; | |
80 | ||
81 | enum lttng_process_status { | |
82 | LTTNG_UNNAMED = 0, | |
83 | LTTNG_WAIT_FORK = 1, | |
84 | LTTNG_WAIT_CPU = 2, | |
85 | LTTNG_EXIT = 3, | |
86 | LTTNG_ZOMBIE = 4, | |
87 | LTTNG_WAIT = 5, | |
88 | LTTNG_RUN = 6, | |
89 | LTTNG_DEAD = 7, | |
90 | }; | |
91 | ||
d4697afa MD |
92 | struct trace_enum_map { |
93 | long id; | |
94 | const char *symbol; | |
95 | }; | |
96 | ||
97 | #ifdef CONFIG_HAVE_KVM | |
98 | static const struct trace_enum_map vmx_kvm_exit_enum[] = { | |
99 | { EXIT_REASON_EXCEPTION_NMI, "exception" }, | |
100 | { EXIT_REASON_EXTERNAL_INTERRUPT, "ext_irq" }, | |
101 | { EXIT_REASON_TRIPLE_FAULT, "triple_fault" }, | |
102 | { EXIT_REASON_PENDING_INTERRUPT, "interrupt_window" }, | |
103 | { EXIT_REASON_NMI_WINDOW, "nmi_window" }, | |
104 | { EXIT_REASON_TASK_SWITCH, "task_switch" }, | |
105 | { EXIT_REASON_CPUID, "cpuid" }, | |
106 | { EXIT_REASON_HLT, "halt" }, | |
107 | { EXIT_REASON_INVLPG, "invlpg" }, | |
108 | { EXIT_REASON_RDPMC, "rdpmc" }, | |
109 | { EXIT_REASON_RDTSC, "rdtsc" }, | |
110 | { EXIT_REASON_VMCALL, "hypercall" }, | |
111 | { EXIT_REASON_VMCLEAR, "vmclear" }, | |
112 | { EXIT_REASON_VMLAUNCH, "vmlaunch" }, | |
113 | { EXIT_REASON_VMPTRLD, "vmprtld" }, | |
114 | { EXIT_REASON_VMPTRST, "vmptrst" }, | |
115 | { EXIT_REASON_VMREAD, "vmread" }, | |
116 | { EXIT_REASON_VMRESUME, "vmresume" }, | |
117 | { EXIT_REASON_VMWRITE, "vmwrite" }, | |
118 | { EXIT_REASON_VMOFF, "vmoff" }, | |
119 | { EXIT_REASON_VMON, "vmon" }, | |
120 | { EXIT_REASON_CR_ACCESS, "cr_access" }, | |
121 | { EXIT_REASON_DR_ACCESS, "dr_access" }, | |
122 | { EXIT_REASON_IO_INSTRUCTION, "io_instruction" }, | |
123 | { EXIT_REASON_MSR_READ, "rdmsr" }, | |
124 | { EXIT_REASON_MSR_WRITE, "wrmsr" }, | |
125 | { EXIT_REASON_MWAIT_INSTRUCTION, "mwait_instruction" }, | |
126 | { EXIT_REASON_MONITOR_INSTRUCTION, "monitor_instruction" }, | |
127 | { EXIT_REASON_PAUSE_INSTRUCTION, "pause_instruction" }, | |
128 | { EXIT_REASON_MCE_DURING_VMENTRY, "mce_during_vmentry" }, | |
129 | { EXIT_REASON_TPR_BELOW_THRESHOLD, "tpr_below_thres" }, | |
130 | { EXIT_REASON_APIC_ACCESS, "apic_access" }, | |
131 | { EXIT_REASON_EPT_VIOLATION, "ept_violation" }, | |
132 | { EXIT_REASON_EPT_MISCONFIG, "epg_misconfig" }, | |
133 | { EXIT_REASON_WBINVD, "wbinvd" }, | |
134 | { -1, NULL } | |
135 | }; | |
136 | #endif /* CONFIG_HAVE_KVM */ | |
137 | ||
138 | static void ltt_dump_enum_tables(struct ltt_probe_private_data *call_data) | |
139 | { | |
140 | #ifdef CONFIG_HAVE_KVM | |
141 | int i; | |
142 | /* KVM exit reasons for VMX */ | |
143 | for(i = 0; vmx_kvm_exit_enum[i].symbol; i++) { | |
144 | __trace_mark(0, enum_tables, vmx_kvm_exit, call_data, | |
145 | "id %ld symbol %s", vmx_kvm_exit_enum[i].id, | |
146 | vmx_kvm_exit_enum[i].symbol); | |
147 | } | |
148 | #endif /* CONFIG_HAVE_KVM */ | |
149 | } | |
150 | ||
1c8284eb MD |
151 | #ifdef CONFIG_INET |
152 | static void ltt_enumerate_device(struct ltt_probe_private_data *call_data, | |
153 | struct net_device *dev) | |
154 | { | |
155 | struct in_device *in_dev; | |
156 | struct in_ifaddr *ifa; | |
157 | ||
158 | if (dev->flags & IFF_UP) { | |
159 | in_dev = in_dev_get(dev); | |
160 | if (in_dev) { | |
161 | for (ifa = in_dev->ifa_list; ifa != NULL; | |
162 | ifa = ifa->ifa_next) | |
163 | __trace_mark(0, netif_state, | |
164 | network_ipv4_interface, | |
165 | call_data, | |
166 | "name %s address #n4u%lu up %d", | |
167 | dev->name, | |
168 | (unsigned long)ifa->ifa_address, | |
169 | 0); | |
170 | in_dev_put(in_dev); | |
171 | } | |
172 | } else | |
173 | __trace_mark(0, netif_state, network_ip_interface, | |
174 | call_data, "name %s address #n4u%lu up %d", | |
175 | dev->name, 0UL, 0); | |
176 | } | |
177 | ||
178 | static inline int | |
179 | ltt_enumerate_network_ip_interface(struct ltt_probe_private_data *call_data) | |
180 | { | |
181 | struct net_device *dev; | |
182 | ||
183 | read_lock(&dev_base_lock); | |
184 | for_each_netdev(&init_net, dev) | |
185 | ltt_enumerate_device(call_data, dev); | |
186 | read_unlock(&dev_base_lock); | |
187 | ||
188 | return 0; | |
189 | } | |
190 | #else /* CONFIG_INET */ | |
191 | static inline int | |
192 | ltt_enumerate_network_ip_interface(struct ltt_probe_private_data *call_data) | |
193 | { | |
194 | return 0; | |
195 | } | |
196 | #endif /* CONFIG_INET */ | |
197 | ||
198 | ||
199 | static inline void | |
200 | ltt_enumerate_task_fd(struct ltt_probe_private_data *call_data, | |
201 | struct task_struct *t, char *tmp) | |
202 | { | |
203 | struct fdtable *fdt; | |
204 | struct file *filp; | |
205 | unsigned int i; | |
206 | const unsigned char *path; | |
207 | ||
208 | if (!t->files) | |
209 | return; | |
210 | ||
211 | spin_lock(&t->files->file_lock); | |
212 | fdt = files_fdtable(t->files); | |
213 | for (i = 0; i < fdt->max_fds; i++) { | |
214 | filp = fcheck_files(t->files, i); | |
215 | if (!filp) | |
216 | continue; | |
217 | path = d_path(&filp->f_path, tmp, PAGE_SIZE); | |
218 | /* Make sure we give at least some info */ | |
219 | __trace_mark(0, fd_state, file_descriptor, call_data, | |
220 | "filename %s pid %d fd %u", | |
221 | (IS_ERR(path))?(filp->f_dentry->d_name.name):(path), | |
222 | t->pid, i); | |
223 | } | |
224 | spin_unlock(&t->files->file_lock); | |
225 | } | |
226 | ||
227 | static inline int | |
228 | ltt_enumerate_file_descriptors(struct ltt_probe_private_data *call_data) | |
229 | { | |
230 | struct task_struct *t = &init_task; | |
231 | char *tmp = (char *)__get_free_page(GFP_KERNEL); | |
232 | ||
233 | /* Enumerate active file descriptors */ | |
234 | do { | |
235 | read_lock(&tasklist_lock); | |
236 | if (t != &init_task) | |
237 | atomic_dec(&t->usage); | |
238 | t = next_task(t); | |
239 | atomic_inc(&t->usage); | |
240 | read_unlock(&tasklist_lock); | |
241 | task_lock(t); | |
242 | ltt_enumerate_task_fd(call_data, t, tmp); | |
243 | task_unlock(t); | |
244 | } while (t != &init_task); | |
245 | free_page((unsigned long)tmp); | |
246 | return 0; | |
247 | } | |
248 | ||
249 | static inline void | |
250 | ltt_enumerate_task_vm_maps(struct ltt_probe_private_data *call_data, | |
251 | struct task_struct *t) | |
252 | { | |
253 | struct mm_struct *mm; | |
254 | struct vm_area_struct *map; | |
255 | unsigned long ino; | |
256 | ||
257 | /* get_task_mm does a task_lock... */ | |
258 | mm = get_task_mm(t); | |
259 | if (!mm) | |
260 | return; | |
261 | ||
262 | map = mm->mmap; | |
263 | if (map) { | |
264 | down_read(&mm->mmap_sem); | |
265 | while (map) { | |
266 | if (map->vm_file) | |
267 | ino = map->vm_file->f_dentry->d_inode->i_ino; | |
268 | else | |
269 | ino = 0; | |
270 | __trace_mark(0, vm_state, vm_map, call_data, | |
271 | "pid %d start %lu end %lu flags %lu " | |
272 | "pgoff %lu inode %lu", | |
273 | t->pid, map->vm_start, map->vm_end, | |
274 | map->vm_flags, map->vm_pgoff << PAGE_SHIFT, | |
275 | ino); | |
276 | map = map->vm_next; | |
277 | } | |
278 | up_read(&mm->mmap_sem); | |
279 | } | |
280 | mmput(mm); | |
281 | } | |
282 | ||
283 | static inline int | |
284 | ltt_enumerate_vm_maps(struct ltt_probe_private_data *call_data) | |
285 | { | |
286 | struct task_struct *t = &init_task; | |
287 | ||
288 | do { | |
289 | read_lock(&tasklist_lock); | |
290 | if (t != &init_task) | |
291 | atomic_dec(&t->usage); | |
292 | t = next_task(t); | |
293 | atomic_inc(&t->usage); | |
294 | read_unlock(&tasklist_lock); | |
295 | ltt_enumerate_task_vm_maps(call_data, t); | |
296 | } while (t != &init_task); | |
297 | return 0; | |
298 | } | |
299 | ||
300 | #ifdef CONFIG_GENERIC_HARDIRQS | |
301 | static inline void list_interrupts(struct ltt_probe_private_data *call_data) | |
302 | { | |
303 | unsigned int irq; | |
304 | unsigned long flags = 0; | |
305 | struct irq_desc *desc; | |
306 | ||
307 | /* needs irq_desc */ | |
308 | for_each_irq_desc(irq, desc) { | |
309 | struct irqaction *action; | |
310 | const char *irq_chip_name = | |
311 | desc->chip->name ? : "unnamed_irq_chip"; | |
312 | ||
313 | local_irq_save(flags); | |
314 | raw_spin_lock(&desc->lock); | |
315 | for (action = desc->action; action; action = action->next) | |
316 | __trace_mark(0, irq_state, interrupt, call_data, | |
317 | "name %s action %s irq_id %u", | |
318 | irq_chip_name, action->name, irq); | |
319 | raw_spin_unlock(&desc->lock); | |
320 | local_irq_restore(flags); | |
321 | } | |
322 | } | |
323 | #else | |
324 | static inline void list_interrupts(struct ltt_probe_private_data *call_data) | |
325 | { | |
326 | } | |
327 | #endif | |
328 | ||
329 | static inline int | |
330 | ltt_enumerate_process_states(struct ltt_probe_private_data *call_data) | |
331 | { | |
332 | struct task_struct *t = &init_task; | |
333 | struct task_struct *p = t; | |
334 | enum lttng_process_status status; | |
335 | enum lttng_thread_type type; | |
336 | enum lttng_execution_mode mode; | |
337 | enum lttng_execution_submode submode; | |
338 | ||
339 | do { | |
340 | mode = LTTNG_MODE_UNKNOWN; | |
341 | submode = LTTNG_UNKNOWN; | |
342 | ||
343 | read_lock(&tasklist_lock); | |
344 | if (t != &init_task) { | |
345 | atomic_dec(&t->usage); | |
346 | t = next_thread(t); | |
347 | } | |
348 | if (t == p) { | |
349 | p = next_task(t); | |
350 | t = p; | |
351 | } | |
352 | atomic_inc(&t->usage); | |
353 | read_unlock(&tasklist_lock); | |
354 | ||
355 | task_lock(t); | |
356 | ||
357 | if (t->exit_state == EXIT_ZOMBIE) | |
358 | status = LTTNG_ZOMBIE; | |
359 | else if (t->exit_state == EXIT_DEAD) | |
360 | status = LTTNG_DEAD; | |
361 | else if (t->state == TASK_RUNNING) { | |
362 | /* Is this a forked child that has not run yet? */ | |
363 | if (list_empty(&t->rt.run_list)) | |
364 | status = LTTNG_WAIT_FORK; | |
365 | else | |
366 | /* | |
367 | * All tasks are considered as wait_cpu; | |
368 | * the viewer will sort out if the task was | |
369 | * really running at this time. | |
370 | */ | |
371 | status = LTTNG_WAIT_CPU; | |
372 | } else if (t->state & | |
373 | (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)) { | |
374 | /* Task is waiting for something to complete */ | |
375 | status = LTTNG_WAIT; | |
376 | } else | |
377 | status = LTTNG_UNNAMED; | |
378 | submode = LTTNG_NONE; | |
379 | ||
380 | /* | |
381 | * Verification of t->mm is to filter out kernel threads; | |
382 | * Viewer will further filter out if a user-space thread was | |
383 | * in syscall mode or not. | |
384 | */ | |
385 | if (t->mm) | |
386 | type = LTTNG_USER_THREAD; | |
387 | else | |
388 | type = LTTNG_KERNEL_THREAD; | |
389 | ||
390 | __trace_mark(0, task_state, process_state, call_data, | |
391 | "pid %d parent_pid %d name %s type %d mode %d " | |
392 | "submode %d status %d tgid %d", | |
393 | t->pid, t->parent->pid, t->comm, | |
394 | type, mode, submode, status, t->tgid); | |
395 | task_unlock(t); | |
396 | } while (t != &init_task); | |
397 | ||
398 | return 0; | |
399 | } | |
400 | ||
401 | void ltt_statedump_register_kprobes_dump(void (*callback)(void *call_data)) | |
402 | { | |
403 | mutex_lock(&statedump_cb_mutex); | |
404 | ltt_dump_kprobes_table_cb = callback; | |
405 | mutex_unlock(&statedump_cb_mutex); | |
406 | } | |
407 | EXPORT_SYMBOL_GPL(ltt_statedump_register_kprobes_dump); | |
408 | ||
409 | void ltt_statedump_unregister_kprobes_dump(void (*callback)(void *call_data)) | |
410 | { | |
411 | mutex_lock(&statedump_cb_mutex); | |
412 | ltt_dump_kprobes_table_cb = empty_cb; | |
413 | mutex_unlock(&statedump_cb_mutex); | |
414 | } | |
415 | EXPORT_SYMBOL_GPL(ltt_statedump_unregister_kprobes_dump); | |
416 | ||
417 | void ltt_statedump_work_func(struct work_struct *work) | |
418 | { | |
419 | if (atomic_dec_and_test(&kernel_threads_to_run)) | |
420 | /* If we are the last thread, wake up do_ltt_statedump */ | |
421 | wake_up(&statedump_wq); | |
422 | } | |
423 | ||
424 | static int do_ltt_statedump(struct ltt_probe_private_data *call_data) | |
425 | { | |
426 | int cpu; | |
427 | struct module *cb_owner; | |
428 | ||
429 | printk(KERN_DEBUG "LTT state dump thread start\n"); | |
430 | ltt_enumerate_process_states(call_data); | |
431 | ltt_enumerate_file_descriptors(call_data); | |
432 | list_modules(call_data); | |
433 | ltt_enumerate_vm_maps(call_data); | |
434 | list_interrupts(call_data); | |
435 | ltt_enumerate_network_ip_interface(call_data); | |
436 | ltt_dump_swap_files(call_data); | |
437 | ltt_dump_sys_call_table(call_data); | |
438 | ltt_dump_softirq_vec(call_data); | |
439 | ltt_dump_idt_table(call_data); | |
d4697afa | 440 | ltt_dump_enum_tables(call_data); |
1c8284eb MD |
441 | |
442 | mutex_lock(&statedump_cb_mutex); | |
443 | ||
444 | cb_owner = __module_address((unsigned long)ltt_dump_kprobes_table_cb); | |
445 | __module_get(cb_owner); | |
446 | ltt_dump_kprobes_table_cb(call_data); | |
447 | module_put(cb_owner); | |
448 | ||
449 | mutex_unlock(&statedump_cb_mutex); | |
450 | ||
451 | /* | |
452 | * Fire off a work queue on each CPU. Their sole purpose in life | |
453 | * is to guarantee that each CPU has been in a state where is was in | |
454 | * syscall mode (i.e. not in a trap, an IRQ or a soft IRQ). | |
455 | */ | |
456 | get_online_cpus(); | |
457 | atomic_set(&kernel_threads_to_run, num_online_cpus()); | |
458 | for_each_online_cpu(cpu) { | |
459 | INIT_DELAYED_WORK(&cpu_work[cpu], ltt_statedump_work_func); | |
460 | schedule_delayed_work_on(cpu, &cpu_work[cpu], 0); | |
461 | } | |
462 | /* Wait for all threads to run */ | |
463 | __wait_event(statedump_wq, (atomic_read(&kernel_threads_to_run) != 0)); | |
464 | put_online_cpus(); | |
465 | /* Our work is done */ | |
466 | printk(KERN_DEBUG "LTT state dump end\n"); | |
467 | __trace_mark(0, global_state, statedump_end, | |
468 | call_data, MARK_NOARGS); | |
469 | return 0; | |
470 | } | |
471 | ||
472 | /* | |
473 | * Called with trace lock held. | |
474 | */ | |
475 | int ltt_statedump_start(struct ltt_trace *trace) | |
476 | { | |
477 | struct ltt_probe_private_data call_data; | |
478 | printk(KERN_DEBUG "LTT state dump begin\n"); | |
479 | ||
480 | call_data.trace = trace; | |
481 | call_data.serializer = NULL; | |
482 | return do_ltt_statedump(&call_data); | |
483 | } | |
484 | ||
485 | static int __init statedump_init(void) | |
486 | { | |
487 | int ret; | |
488 | printk(KERN_DEBUG "LTT : State dump init\n"); | |
489 | ret = ltt_module_register(LTT_FUNCTION_STATEDUMP, | |
490 | ltt_statedump_start, THIS_MODULE); | |
491 | return ret; | |
492 | } | |
493 | ||
494 | static void __exit statedump_exit(void) | |
495 | { | |
496 | printk(KERN_DEBUG "LTT : State dump exit\n"); | |
497 | ltt_module_unregister(LTT_FUNCTION_STATEDUMP); | |
498 | } | |
499 | ||
500 | module_init(statedump_init) | |
501 | module_exit(statedump_exit) | |
502 | ||
503 | MODULE_LICENSE("GPL and additional rights"); | |
504 | MODULE_AUTHOR("Jean-Hugues Deschenes"); | |
505 | MODULE_DESCRIPTION("Linux Trace Toolkit Statedump"); |