wrapper: remove time.h wrapper
[lttng-modules.git] / lttng-statedump-impl.c
1 /* SPDX-License-Identifier: (GPL-2.0-only or LGPL-2.1-only)
2 *
3 * lttng-statedump.c
4 *
5 * Linux Trace Toolkit Next Generation Kernel State Dump
6 *
7 * Copyright 2005 Jean-Hugues Deschenes <jean-hugues.deschenes@polymtl.ca>
8 * Copyright 2006-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
9 *
10 * Changes:
11 * Eric Clement: Add listing of network IP interface
12 * 2006, 2007 Mathieu Desnoyers Fix kernel threads
13 * Various updates
14 */
15
16 #include <linux/init.h>
17 #include <linux/module.h>
18 #include <linux/netlink.h>
19 #include <linux/inet.h>
20 #include <linux/ip.h>
21 #include <linux/kthread.h>
22 #include <linux/proc_fs.h>
23 #include <linux/file.h>
24 #include <linux/interrupt.h>
25 #include <linux/irqnr.h>
26 #include <linux/cpu.h>
27 #include <linux/netdevice.h>
28 #include <linux/inetdevice.h>
29 #include <linux/sched.h>
30 #include <linux/mm.h>
31 #include <linux/swap.h>
32 #include <linux/wait.h>
33 #include <linux/mutex.h>
34 #include <linux/device.h>
35 #include <linux/fdtable.h>
36
37 #include <lttng-events.h>
38 #include <lttng-tracer.h>
39 #include <wrapper/namespace.h>
40 #include <wrapper/irq.h>
41 #include <wrapper/tracepoint.h>
42 #include <wrapper/genhd.h>
43 #include <wrapper/file.h>
44
45 #ifdef CONFIG_LTTNG_HAS_LIST_IRQ
46 #include <linux/irq.h>
47 #endif
48
49 /* Define the tracepoints, but do not build the probes */
50 #define CREATE_TRACE_POINTS
51 #define TRACE_INCLUDE_PATH instrumentation/events/lttng-module
52 #define TRACE_INCLUDE_FILE lttng-statedump
53 #define LTTNG_INSTRUMENTATION
54 #include <instrumentation/events/lttng-module/lttng-statedump.h>
55
56 DEFINE_TRACE(lttng_statedump_block_device);
57 DEFINE_TRACE(lttng_statedump_end);
58 DEFINE_TRACE(lttng_statedump_interrupt);
59 DEFINE_TRACE(lttng_statedump_file_descriptor);
60 DEFINE_TRACE(lttng_statedump_start);
61 DEFINE_TRACE(lttng_statedump_process_state);
62 DEFINE_TRACE(lttng_statedump_process_pid_ns);
63 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,6,0))
64 DEFINE_TRACE(lttng_statedump_process_cgroup_ns);
65 #endif
66 DEFINE_TRACE(lttng_statedump_process_ipc_ns);
67 #ifndef LTTNG_MNT_NS_MISSING_HEADER
68 DEFINE_TRACE(lttng_statedump_process_mnt_ns);
69 #endif
70 DEFINE_TRACE(lttng_statedump_process_net_ns);
71 DEFINE_TRACE(lttng_statedump_process_user_ns);
72 DEFINE_TRACE(lttng_statedump_process_uts_ns);
73 DEFINE_TRACE(lttng_statedump_network_interface);
74 #ifdef LTTNG_HAVE_STATEDUMP_CPU_TOPOLOGY
75 DEFINE_TRACE(lttng_statedump_cpu_topology);
76 #endif
77
78 struct lttng_fd_ctx {
79 char *page;
80 struct lttng_session *session;
81 struct files_struct *files;
82 };
83
84 /*
85 * Protected by the trace lock.
86 */
87 static struct delayed_work cpu_work[NR_CPUS];
88 static DECLARE_WAIT_QUEUE_HEAD(statedump_wq);
89 static atomic_t kernel_threads_to_run;
90
91 enum lttng_thread_type {
92 LTTNG_USER_THREAD = 0,
93 LTTNG_KERNEL_THREAD = 1,
94 };
95
96 enum lttng_execution_mode {
97 LTTNG_USER_MODE = 0,
98 LTTNG_SYSCALL = 1,
99 LTTNG_TRAP = 2,
100 LTTNG_IRQ = 3,
101 LTTNG_SOFTIRQ = 4,
102 LTTNG_MODE_UNKNOWN = 5,
103 };
104
105 enum lttng_execution_submode {
106 LTTNG_NONE = 0,
107 LTTNG_UNKNOWN = 1,
108 };
109
110 enum lttng_process_status {
111 LTTNG_UNNAMED = 0,
112 LTTNG_WAIT_FORK = 1,
113 LTTNG_WAIT_CPU = 2,
114 LTTNG_EXIT = 3,
115 LTTNG_ZOMBIE = 4,
116 LTTNG_WAIT = 5,
117 LTTNG_RUN = 6,
118 LTTNG_DEAD = 7,
119 };
120
121 static
122 int lttng_enumerate_block_devices(struct lttng_session *session)
123 {
124 struct class *ptr_block_class;
125 struct device_type *ptr_disk_type;
126 struct class_dev_iter iter;
127 struct device *dev;
128
129 ptr_block_class = wrapper_get_block_class();
130 if (!ptr_block_class)
131 return -ENOSYS;
132 ptr_disk_type = wrapper_get_disk_type();
133 if (!ptr_disk_type) {
134 return -ENOSYS;
135 }
136 class_dev_iter_init(&iter, ptr_block_class, NULL, ptr_disk_type);
137 while ((dev = class_dev_iter_next(&iter))) {
138 struct disk_part_iter piter;
139 struct gendisk *disk = dev_to_disk(dev);
140 struct hd_struct *part;
141
142 /*
143 * Don't show empty devices or things that have been
144 * suppressed
145 */
146 if (get_capacity(disk) == 0 ||
147 (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
148 continue;
149
150 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
151 while ((part = disk_part_iter_next(&piter))) {
152 char name_buf[BDEVNAME_SIZE];
153 char *p;
154
155 p = wrapper_disk_name(disk, part->partno, name_buf);
156 if (!p) {
157 disk_part_iter_exit(&piter);
158 class_dev_iter_exit(&iter);
159 return -ENOSYS;
160 }
161 trace_lttng_statedump_block_device(session,
162 part_devt(part), name_buf);
163 }
164 disk_part_iter_exit(&piter);
165 }
166 class_dev_iter_exit(&iter);
167 return 0;
168 }
169
170 #ifdef CONFIG_INET
171
172 static
173 void lttng_enumerate_device(struct lttng_session *session,
174 struct net_device *dev)
175 {
176 struct in_device *in_dev;
177 struct in_ifaddr *ifa;
178
179 if (dev->flags & IFF_UP) {
180 in_dev = in_dev_get(dev);
181 if (in_dev) {
182 for (ifa = in_dev->ifa_list; ifa != NULL;
183 ifa = ifa->ifa_next) {
184 trace_lttng_statedump_network_interface(
185 session, dev, ifa);
186 }
187 in_dev_put(in_dev);
188 }
189 } else {
190 trace_lttng_statedump_network_interface(
191 session, dev, NULL);
192 }
193 }
194
195 static
196 int lttng_enumerate_network_ip_interface(struct lttng_session *session)
197 {
198 struct net_device *dev;
199
200 read_lock(&dev_base_lock);
201 for_each_netdev(&init_net, dev)
202 lttng_enumerate_device(session, dev);
203 read_unlock(&dev_base_lock);
204
205 return 0;
206 }
207 #else /* CONFIG_INET */
208 static inline
209 int lttng_enumerate_network_ip_interface(struct lttng_session *session)
210 {
211 return 0;
212 }
213 #endif /* CONFIG_INET */
214
215 static
216 int lttng_dump_one_fd(const void *p, struct file *file, unsigned int fd)
217 {
218 const struct lttng_fd_ctx *ctx = p;
219 const char *s = d_path(&file->f_path, ctx->page, PAGE_SIZE);
220 unsigned int flags = file->f_flags;
221 struct fdtable *fdt;
222
223 /*
224 * We don't expose kernel internal flags, only userspace-visible
225 * flags.
226 */
227 flags &= ~FMODE_NONOTIFY;
228 fdt = files_fdtable(ctx->files);
229 /*
230 * We need to check here again whether fd is within the fdt
231 * max_fds range, because we might be seeing a different
232 * files_fdtable() than iterate_fd(), assuming only RCU is
233 * protecting the read. In reality, iterate_fd() holds
234 * file_lock, which should ensure the fdt does not change while
235 * the lock is taken, but we are not aware whether this is
236 * guaranteed or not, so play safe.
237 */
238 if (fd < fdt->max_fds && close_on_exec(fd, fdt))
239 flags |= O_CLOEXEC;
240 if (IS_ERR(s)) {
241 struct dentry *dentry = file->f_path.dentry;
242
243 /* Make sure we give at least some info */
244 spin_lock(&dentry->d_lock);
245 trace_lttng_statedump_file_descriptor(ctx->session,
246 ctx->files, fd, dentry->d_name.name, flags,
247 file->f_mode);
248 spin_unlock(&dentry->d_lock);
249 goto end;
250 }
251 trace_lttng_statedump_file_descriptor(ctx->session,
252 ctx->files, fd, s, flags, file->f_mode);
253 end:
254 return 0;
255 }
256
257 /* Called with task lock held. */
258 static
259 void lttng_enumerate_files(struct lttng_session *session,
260 struct files_struct *files,
261 char *tmp)
262 {
263 struct lttng_fd_ctx ctx = { .page = tmp, .session = session, .files = files, };
264
265 iterate_fd(files, 0, lttng_dump_one_fd, &ctx);
266 }
267
268 #ifdef LTTNG_HAVE_STATEDUMP_CPU_TOPOLOGY
269 static
270 int lttng_enumerate_cpu_topology(struct lttng_session *session)
271 {
272 int cpu;
273 const cpumask_t *cpumask = cpu_possible_mask;
274
275 for (cpu = cpumask_first(cpumask); cpu < nr_cpu_ids;
276 cpu = cpumask_next(cpu, cpumask)) {
277 trace_lttng_statedump_cpu_topology(session, &cpu_data(cpu));
278 }
279
280 return 0;
281 }
282 #else
283 static
284 int lttng_enumerate_cpu_topology(struct lttng_session *session)
285 {
286 return 0;
287 }
288 #endif
289
290 #if 0
291 /*
292 * FIXME: we cannot take a mmap_sem while in a RCU read-side critical section
293 * (scheduling in atomic). Normally, the tasklist lock protects this kind of
294 * iteration, but it is not exported to modules.
295 */
296 static
297 void lttng_enumerate_task_vm_maps(struct lttng_session *session,
298 struct task_struct *p)
299 {
300 struct mm_struct *mm;
301 struct vm_area_struct *map;
302 unsigned long ino;
303
304 /* get_task_mm does a task_lock... */
305 mm = get_task_mm(p);
306 if (!mm)
307 return;
308
309 map = mm->mmap;
310 if (map) {
311 down_read(&mm->mmap_sem);
312 while (map) {
313 if (map->vm_file)
314 ino = map->vm_file->lttng_f_dentry->d_inode->i_ino;
315 else
316 ino = 0;
317 trace_lttng_statedump_vm_map(session, p, map, ino);
318 map = map->vm_next;
319 }
320 up_read(&mm->mmap_sem);
321 }
322 mmput(mm);
323 }
324
325 static
326 int lttng_enumerate_vm_maps(struct lttng_session *session)
327 {
328 struct task_struct *p;
329
330 rcu_read_lock();
331 for_each_process(p)
332 lttng_enumerate_task_vm_maps(session, p);
333 rcu_read_unlock();
334 return 0;
335 }
336 #endif
337
338 #ifdef CONFIG_LTTNG_HAS_LIST_IRQ
339
340 static
341 int lttng_list_interrupts(struct lttng_session *session)
342 {
343 unsigned int irq;
344 unsigned long flags = 0;
345 struct irq_desc *desc;
346
347 /* needs irq_desc */
348 for_each_irq_desc(irq, desc) {
349 struct irqaction *action;
350 const char *irq_chip_name =
351 irq_desc_get_chip(desc)->name ? : "unnamed_irq_chip";
352
353 local_irq_save(flags);
354 raw_spin_lock(&desc->lock);
355 for (action = desc->action; action; action = action->next) {
356 trace_lttng_statedump_interrupt(session,
357 irq, irq_chip_name, action);
358 }
359 raw_spin_unlock(&desc->lock);
360 local_irq_restore(flags);
361 }
362 return 0;
363 }
364 #else
365 static inline
366 int lttng_list_interrupts(struct lttng_session *session)
367 {
368 return 0;
369 }
370 #endif
371
372 /*
373 * Statedump the task's namespaces using the proc filesystem inode number as
374 * the unique identifier. The user and pid ns are nested and will be dumped
375 * recursively.
376 *
377 * Called with task lock held.
378 */
379 static
380 void lttng_statedump_process_ns(struct lttng_session *session,
381 struct task_struct *p,
382 enum lttng_thread_type type,
383 enum lttng_execution_mode mode,
384 enum lttng_execution_submode submode,
385 enum lttng_process_status status)
386 {
387 struct nsproxy *proxy;
388 struct pid_namespace *pid_ns;
389 struct user_namespace *user_ns;
390
391 /*
392 * The pid and user namespaces are special, they are nested and
393 * accessed with specific functions instead of the nsproxy struct
394 * like the other namespaces.
395 */
396 pid_ns = task_active_pid_ns(p);
397 do {
398 trace_lttng_statedump_process_pid_ns(session, p, pid_ns);
399 pid_ns = pid_ns ? pid_ns->parent : NULL;
400 } while (pid_ns);
401
402
403 user_ns = task_cred_xxx(p, user_ns);
404 do {
405 trace_lttng_statedump_process_user_ns(session, p, user_ns);
406 /*
407 * trace_lttng_statedump_process_user_ns() internally
408 * checks whether user_ns is NULL. While this does not
409 * appear to be a possible return value for
410 * task_cred_xxx(), err on the safe side and check
411 * for NULL here as well to be consistent with the
412 * paranoid behavior of
413 * trace_lttng_statedump_process_user_ns().
414 */
415 user_ns = user_ns ? user_ns->lttng_user_ns_parent : NULL;
416 } while (user_ns);
417
418 /*
419 * Back and forth on locking strategy within Linux upstream for nsproxy.
420 * See Linux upstream commit 728dba3a39c66b3d8ac889ddbe38b5b1c264aec3
421 * "namespaces: Use task_lock and not rcu to protect nsproxy"
422 * for details.
423 */
424 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0) || \
425 LTTNG_UBUNTU_KERNEL_RANGE(3,13,11,36, 3,14,0,0) || \
426 LTTNG_UBUNTU_KERNEL_RANGE(3,16,1,11, 3,17,0,0) || \
427 LTTNG_RHEL_KERNEL_RANGE(3,10,0,229,13,0, 3,11,0,0,0,0))
428 proxy = p->nsproxy;
429 #else
430 rcu_read_lock();
431 proxy = task_nsproxy(p);
432 #endif
433 if (proxy) {
434 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,6,0))
435 trace_lttng_statedump_process_cgroup_ns(session, p, proxy->cgroup_ns);
436 #endif
437 trace_lttng_statedump_process_ipc_ns(session, p, proxy->ipc_ns);
438 #ifndef LTTNG_MNT_NS_MISSING_HEADER
439 trace_lttng_statedump_process_mnt_ns(session, p, proxy->mnt_ns);
440 #endif
441 trace_lttng_statedump_process_net_ns(session, p, proxy->net_ns);
442 trace_lttng_statedump_process_uts_ns(session, p, proxy->uts_ns);
443 }
444 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0) || \
445 LTTNG_UBUNTU_KERNEL_RANGE(3,13,11,36, 3,14,0,0) || \
446 LTTNG_UBUNTU_KERNEL_RANGE(3,16,1,11, 3,17,0,0) || \
447 LTTNG_RHEL_KERNEL_RANGE(3,10,0,229,13,0, 3,11,0,0,0,0))
448 /* (nothing) */
449 #else
450 rcu_read_unlock();
451 #endif
452 }
453
454 static
455 int lttng_enumerate_process_states(struct lttng_session *session)
456 {
457 struct task_struct *g, *p;
458 char *tmp;
459
460 tmp = (char *) __get_free_page(GFP_KERNEL);
461 if (!tmp)
462 return -ENOMEM;
463
464 rcu_read_lock();
465 for_each_process(g) {
466 struct files_struct *prev_files = NULL;
467
468 p = g;
469 do {
470 enum lttng_execution_mode mode =
471 LTTNG_MODE_UNKNOWN;
472 enum lttng_execution_submode submode =
473 LTTNG_UNKNOWN;
474 enum lttng_process_status status;
475 enum lttng_thread_type type;
476 struct files_struct *files;
477
478 task_lock(p);
479 if (p->exit_state == EXIT_ZOMBIE)
480 status = LTTNG_ZOMBIE;
481 else if (p->exit_state == EXIT_DEAD)
482 status = LTTNG_DEAD;
483 else if (p->state == TASK_RUNNING) {
484 /* Is this a forked child that has not run yet? */
485 if (list_empty(&p->rt.run_list))
486 status = LTTNG_WAIT_FORK;
487 else
488 /*
489 * All tasks are considered as wait_cpu;
490 * the viewer will sort out if the task
491 * was really running at this time.
492 */
493 status = LTTNG_WAIT_CPU;
494 } else if (p->state &
495 (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)) {
496 /* Task is waiting for something to complete */
497 status = LTTNG_WAIT;
498 } else
499 status = LTTNG_UNNAMED;
500 submode = LTTNG_NONE;
501
502 /*
503 * Verification of t->mm is to filter out kernel
504 * threads; Viewer will further filter out if a
505 * user-space thread was in syscall mode or not.
506 */
507 if (p->mm)
508 type = LTTNG_USER_THREAD;
509 else
510 type = LTTNG_KERNEL_THREAD;
511 files = p->files;
512
513 trace_lttng_statedump_process_state(session,
514 p, type, mode, submode, status, files);
515 lttng_statedump_process_ns(session,
516 p, type, mode, submode, status);
517 /*
518 * As an optimisation for the common case, do not
519 * repeat information for the same files_struct in
520 * two consecutive threads. This is the common case
521 * for threads sharing the same fd table. RCU guarantees
522 * that the same files_struct pointer is not re-used
523 * throughout processes/threads iteration.
524 */
525 if (files && files != prev_files) {
526 lttng_enumerate_files(session, files, tmp);
527 prev_files = files;
528 }
529 task_unlock(p);
530 } while_each_thread(g, p);
531 }
532 rcu_read_unlock();
533
534 free_page((unsigned long) tmp);
535
536 return 0;
537 }
538
539 static
540 void lttng_statedump_work_func(struct work_struct *work)
541 {
542 if (atomic_dec_and_test(&kernel_threads_to_run))
543 /* If we are the last thread, wake up do_lttng_statedump */
544 wake_up(&statedump_wq);
545 }
546
547 static
548 int do_lttng_statedump(struct lttng_session *session)
549 {
550 int cpu, ret;
551
552 trace_lttng_statedump_start(session);
553 ret = lttng_enumerate_process_states(session);
554 if (ret)
555 return ret;
556 /*
557 * FIXME
558 * ret = lttng_enumerate_vm_maps(session);
559 * if (ret)
560 * return ret;
561 */
562 ret = lttng_list_interrupts(session);
563 if (ret)
564 return ret;
565 ret = lttng_enumerate_network_ip_interface(session);
566 if (ret)
567 return ret;
568 ret = lttng_enumerate_block_devices(session);
569 switch (ret) {
570 case 0:
571 break;
572 case -ENOSYS:
573 printk(KERN_WARNING "LTTng: block device enumeration is not supported by kernel\n");
574 break;
575 default:
576 return ret;
577 }
578 ret = lttng_enumerate_cpu_topology(session);
579 if (ret)
580 return ret;
581
582 /* TODO lttng_dump_idt_table(session); */
583 /* TODO lttng_dump_softirq_vec(session); */
584 /* TODO lttng_list_modules(session); */
585 /* TODO lttng_dump_swap_files(session); */
586
587 /*
588 * Fire off a work queue on each CPU. Their sole purpose in life
589 * is to guarantee that each CPU has been in a state where is was in
590 * syscall mode (i.e. not in a trap, an IRQ or a soft IRQ).
591 */
592 get_online_cpus();
593 atomic_set(&kernel_threads_to_run, num_online_cpus());
594 for_each_online_cpu(cpu) {
595 INIT_DELAYED_WORK(&cpu_work[cpu], lttng_statedump_work_func);
596 schedule_delayed_work_on(cpu, &cpu_work[cpu], 0);
597 }
598 /* Wait for all threads to run */
599 __wait_event(statedump_wq, (atomic_read(&kernel_threads_to_run) == 0));
600 put_online_cpus();
601 /* Our work is done */
602 trace_lttng_statedump_end(session);
603 return 0;
604 }
605
606 /*
607 * Called with session mutex held.
608 */
609 int lttng_statedump_start(struct lttng_session *session)
610 {
611 return do_lttng_statedump(session);
612 }
613 EXPORT_SYMBOL_GPL(lttng_statedump_start);
614
615 static
616 int __init lttng_statedump_init(void)
617 {
618 /*
619 * Allow module to load even if the fixup cannot be done. This
620 * will allow seemless transition when the underlying issue fix
621 * is merged into the Linux kernel, and when tracepoint.c
622 * "tracepoint_module_notify" is turned into a static function.
623 */
624 (void) wrapper_lttng_fixup_sig(THIS_MODULE);
625 return 0;
626 }
627
628 module_init(lttng_statedump_init);
629
630 static
631 void __exit lttng_statedump_exit(void)
632 {
633 }
634
635 module_exit(lttng_statedump_exit);
636
637 MODULE_LICENSE("GPL and additional rights");
638 MODULE_AUTHOR("Jean-Hugues Deschenes");
639 MODULE_DESCRIPTION("LTTng statedump provider");
640 MODULE_VERSION(__stringify(LTTNG_MODULES_MAJOR_VERSION) "."
641 __stringify(LTTNG_MODULES_MINOR_VERSION) "."
642 __stringify(LTTNG_MODULES_PATCHLEVEL_VERSION)
643 LTTNG_MODULES_EXTRAVERSION);
This page took 0.042156 seconds and 4 git commands to generate.