Fix: timer_expire_entry changed in 4.19.312
[lttng-modules.git] / src / lttng-statedump-impl.c
1 /* SPDX-License-Identifier: (GPL-2.0-only or LGPL-2.1-only)
2 *
3 * lttng-statedump.c
4 *
5 * Linux Trace Toolkit Next Generation Kernel State Dump
6 *
7 * Copyright 2005 Jean-Hugues Deschenes <jean-hugues.deschenes@polymtl.ca>
8 * Copyright 2006-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
9 *
10 * Changes:
11 * Eric Clement: Add listing of network IP interface
12 * 2006, 2007 Mathieu Desnoyers Fix kernel threads
13 * Various updates
14 */
15
16 #include <linux/init.h>
17 #include <linux/module.h>
18 #include <linux/netlink.h>
19 #include <linux/inet.h>
20 #include <linux/ip.h>
21 #include <linux/kthread.h>
22 #include <linux/proc_fs.h>
23 #include <linux/file.h>
24 #include <linux/interrupt.h>
25 #include <linux/irqnr.h>
26 #include <linux/cpu.h>
27 #include <linux/netdevice.h>
28 #include <linux/inetdevice.h>
29 #include <linux/sched.h>
30 #include <linux/mm.h>
31 #include <linux/swap.h>
32 #include <linux/wait.h>
33 #include <linux/mutex.h>
34 #include <linux/device.h>
35
36 #include <lttng/events.h>
37 #include <lttng/tracer.h>
38 #include <wrapper/irqdesc.h>
39 #include <wrapper/fdtable.h>
40 #include <wrapper/namespace.h>
41 #include <wrapper/irq.h>
42 #include <wrapper/tracepoint.h>
43 #include <wrapper/genhd.h>
44 #include <wrapper/file.h>
45 #include <wrapper/fdtable.h>
46
47 #ifdef CONFIG_LTTNG_HAS_LIST_IRQ
48 #include <linux/irq.h>
49 #endif
50
51 /* Define the tracepoints, but do not build the probes */
52 #define CREATE_TRACE_POINTS
53 #define TRACE_INCLUDE_PATH instrumentation/events
54 #define TRACE_INCLUDE_FILE lttng-statedump
55 #define LTTNG_INSTRUMENTATION
56 #include <instrumentation/events/lttng-statedump.h>
57
58 DEFINE_TRACE(lttng_statedump_block_device);
59 DEFINE_TRACE(lttng_statedump_end);
60 DEFINE_TRACE(lttng_statedump_interrupt);
61 DEFINE_TRACE(lttng_statedump_file_descriptor);
62 DEFINE_TRACE(lttng_statedump_start);
63 DEFINE_TRACE(lttng_statedump_process_state);
64 DEFINE_TRACE(lttng_statedump_process_pid_ns);
65 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,6,0))
66 DEFINE_TRACE(lttng_statedump_process_cgroup_ns);
67 #endif
68 DEFINE_TRACE(lttng_statedump_process_ipc_ns);
69 #ifndef LTTNG_MNT_NS_MISSING_HEADER
70 DEFINE_TRACE(lttng_statedump_process_mnt_ns);
71 #endif
72 DEFINE_TRACE(lttng_statedump_process_net_ns);
73 DEFINE_TRACE(lttng_statedump_process_user_ns);
74 DEFINE_TRACE(lttng_statedump_process_uts_ns);
75 DEFINE_TRACE(lttng_statedump_process_time_ns);
76 DEFINE_TRACE(lttng_statedump_network_interface);
77 #ifdef LTTNG_HAVE_STATEDUMP_CPU_TOPOLOGY
78 DEFINE_TRACE(lttng_statedump_cpu_topology);
79 #endif
80
81 struct lttng_fd_ctx {
82 char *page;
83 struct lttng_session *session;
84 struct files_struct *files;
85 };
86
87 /*
88 * Protected by the trace lock.
89 */
90 static struct delayed_work cpu_work[NR_CPUS];
91 static DECLARE_WAIT_QUEUE_HEAD(statedump_wq);
92 static atomic_t kernel_threads_to_run;
93
94 enum lttng_thread_type {
95 LTTNG_USER_THREAD = 0,
96 LTTNG_KERNEL_THREAD = 1,
97 };
98
99 enum lttng_execution_mode {
100 LTTNG_USER_MODE = 0,
101 LTTNG_SYSCALL = 1,
102 LTTNG_TRAP = 2,
103 LTTNG_IRQ = 3,
104 LTTNG_SOFTIRQ = 4,
105 LTTNG_MODE_UNKNOWN = 5,
106 };
107
108 enum lttng_execution_submode {
109 LTTNG_NONE = 0,
110 LTTNG_UNKNOWN = 1,
111 };
112
113 enum lttng_process_status {
114 LTTNG_UNNAMED = 0,
115 LTTNG_WAIT_FORK = 1,
116 LTTNG_WAIT_CPU = 2,
117 LTTNG_EXIT = 3,
118 LTTNG_ZOMBIE = 4,
119 LTTNG_WAIT = 5,
120 LTTNG_RUN = 6,
121 LTTNG_DEAD = 7,
122 };
123
124 static
125 int lttng_enumerate_block_devices(struct lttng_session *session)
126 {
127 struct class *ptr_block_class;
128 struct device_type *ptr_disk_type;
129 struct class_dev_iter iter;
130 struct device *dev;
131
132 ptr_block_class = wrapper_get_block_class();
133 if (!ptr_block_class)
134 return -ENOSYS;
135 ptr_disk_type = wrapper_get_disk_type();
136 if (!ptr_disk_type) {
137 return -ENOSYS;
138 }
139 class_dev_iter_init(&iter, ptr_block_class, NULL, ptr_disk_type);
140 while ((dev = class_dev_iter_next(&iter))) {
141 struct disk_part_iter piter;
142 struct gendisk *disk = dev_to_disk(dev);
143 struct hd_struct *part;
144
145 /*
146 * Don't show empty devices or things that have been
147 * suppressed
148 */
149 if (get_capacity(disk) == 0 ||
150 (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
151 continue;
152
153 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
154 while ((part = disk_part_iter_next(&piter))) {
155 struct block_device bdev;
156 char name_buf[BDEVNAME_SIZE];
157 const char *p;
158
159 /*
160 * Create a partial 'struct blockdevice' to use
161 * 'bdevname()' which is a simple wrapper over
162 * 'disk_name()' but has the honor to be EXPORT_SYMBOL.
163 */
164 bdev.bd_disk = disk;
165 bdev.bd_part = part;
166
167 p = bdevname(&bdev, name_buf);
168 if (!p) {
169 disk_part_iter_exit(&piter);
170 class_dev_iter_exit(&iter);
171 return -ENOSYS;
172 }
173 trace_lttng_statedump_block_device(session,
174 part_devt(part), name_buf);
175 }
176 disk_part_iter_exit(&piter);
177 }
178 class_dev_iter_exit(&iter);
179 return 0;
180 }
181
182 #ifdef CONFIG_INET
183
184 static
185 void lttng_enumerate_device(struct lttng_session *session,
186 struct net_device *dev)
187 {
188 struct in_device *in_dev;
189 struct in_ifaddr *ifa;
190
191 if (dev->flags & IFF_UP) {
192 in_dev = in_dev_get(dev);
193 if (in_dev) {
194 for (ifa = in_dev->ifa_list; ifa != NULL;
195 ifa = ifa->ifa_next) {
196 trace_lttng_statedump_network_interface(
197 session, dev, ifa);
198 }
199 in_dev_put(in_dev);
200 }
201 } else {
202 trace_lttng_statedump_network_interface(
203 session, dev, NULL);
204 }
205 }
206
207 static
208 int lttng_enumerate_network_ip_interface(struct lttng_session *session)
209 {
210 struct net_device *dev;
211
212 read_lock(&dev_base_lock);
213 for_each_netdev(&init_net, dev)
214 lttng_enumerate_device(session, dev);
215 read_unlock(&dev_base_lock);
216
217 return 0;
218 }
219 #else /* CONFIG_INET */
220 static inline
221 int lttng_enumerate_network_ip_interface(struct lttng_session *session)
222 {
223 return 0;
224 }
225 #endif /* CONFIG_INET */
226
227 static
228 int lttng_dump_one_fd(const void *p, struct file *file, unsigned int fd)
229 {
230 const struct lttng_fd_ctx *ctx = p;
231 const char *s = d_path(&file->f_path, ctx->page, PAGE_SIZE);
232 unsigned int flags = file->f_flags;
233 struct fdtable *fdt;
234
235 /*
236 * We don't expose kernel internal flags, only userspace-visible
237 * flags.
238 */
239 flags &= ~FMODE_NONOTIFY;
240 fdt = files_fdtable(ctx->files);
241 /*
242 * We need to check here again whether fd is within the fdt
243 * max_fds range, because we might be seeing a different
244 * files_fdtable() than iterate_fd(), assuming only RCU is
245 * protecting the read. In reality, iterate_fd() holds
246 * file_lock, which should ensure the fdt does not change while
247 * the lock is taken, but we are not aware whether this is
248 * guaranteed or not, so play safe.
249 */
250 if (fd < fdt->max_fds && lttng_close_on_exec(fd, fdt))
251 flags |= O_CLOEXEC;
252 if (IS_ERR(s)) {
253 struct dentry *dentry = file->f_path.dentry;
254
255 /* Make sure we give at least some info */
256 spin_lock(&dentry->d_lock);
257 trace_lttng_statedump_file_descriptor(ctx->session,
258 ctx->files, fd, dentry->d_name.name, flags,
259 file->f_mode);
260 spin_unlock(&dentry->d_lock);
261 goto end;
262 }
263 trace_lttng_statedump_file_descriptor(ctx->session,
264 ctx->files, fd, s, flags, file->f_mode);
265 end:
266 return 0;
267 }
268
269 /* Called with task lock held. */
270 static
271 void lttng_enumerate_files(struct lttng_session *session,
272 struct files_struct *files,
273 char *tmp)
274 {
275 struct lttng_fd_ctx ctx = { .page = tmp, .session = session, .files = files, };
276
277 lttng_iterate_fd(files, 0, lttng_dump_one_fd, &ctx);
278 }
279
280 #ifdef LTTNG_HAVE_STATEDUMP_CPU_TOPOLOGY
281 static
282 int lttng_enumerate_cpu_topology(struct lttng_session *session)
283 {
284 int cpu;
285 const cpumask_t *cpumask = cpu_possible_mask;
286
287 for (cpu = cpumask_first(cpumask); cpu < nr_cpu_ids;
288 cpu = cpumask_next(cpu, cpumask)) {
289 trace_lttng_statedump_cpu_topology(session, &cpu_data(cpu));
290 }
291
292 return 0;
293 }
294 #else
295 static
296 int lttng_enumerate_cpu_topology(struct lttng_session *session)
297 {
298 return 0;
299 }
300 #endif
301
302 #if 0
303 /*
304 * FIXME: we cannot take a mmap_sem while in a RCU read-side critical section
305 * (scheduling in atomic). Normally, the tasklist lock protects this kind of
306 * iteration, but it is not exported to modules.
307 */
308 static
309 void lttng_enumerate_task_vm_maps(struct lttng_session *session,
310 struct task_struct *p)
311 {
312 struct mm_struct *mm;
313 struct vm_area_struct *map;
314 unsigned long ino;
315
316 /* get_task_mm does a task_lock... */
317 mm = get_task_mm(p);
318 if (!mm)
319 return;
320
321 map = mm->mmap;
322 if (map) {
323 down_read(&mm->mmap_sem);
324 while (map) {
325 if (map->vm_file)
326 ino = map->vm_file->lttng_f_dentry->d_inode->i_ino;
327 else
328 ino = 0;
329 trace_lttng_statedump_vm_map(session, p, map, ino);
330 map = map->vm_next;
331 }
332 up_read(&mm->mmap_sem);
333 }
334 mmput(mm);
335 }
336
337 static
338 int lttng_enumerate_vm_maps(struct lttng_session *session)
339 {
340 struct task_struct *p;
341
342 rcu_read_lock();
343 for_each_process(p)
344 lttng_enumerate_task_vm_maps(session, p);
345 rcu_read_unlock();
346 return 0;
347 }
348 #endif
349
350 #ifdef CONFIG_LTTNG_HAS_LIST_IRQ
351
352 static
353 int lttng_list_interrupts(struct lttng_session *session)
354 {
355 unsigned int irq;
356 unsigned long flags = 0;
357 struct irq_desc *desc;
358
359 #define irq_to_desc wrapper_irq_to_desc
360 /* needs irq_desc */
361 for_each_irq_desc(irq, desc) {
362 struct irqaction *action;
363 const char *irq_chip_name =
364 irq_desc_get_chip(desc)->name ? : "unnamed_irq_chip";
365
366 local_irq_save(flags);
367 raw_spin_lock(&desc->lock);
368 for (action = desc->action; action; action = action->next) {
369 trace_lttng_statedump_interrupt(session,
370 irq, irq_chip_name, action);
371 }
372 raw_spin_unlock(&desc->lock);
373 local_irq_restore(flags);
374 }
375 return 0;
376 #undef irq_to_desc
377 }
378 #else
379 static inline
380 int lttng_list_interrupts(struct lttng_session *session)
381 {
382 return 0;
383 }
384 #endif
385
386 /*
387 * Statedump the task's namespaces using the proc filesystem inode number as
388 * the unique identifier. The user and pid ns are nested and will be dumped
389 * recursively.
390 *
391 * Called with task lock held.
392 */
393 static
394 void lttng_statedump_process_ns(struct lttng_session *session,
395 struct task_struct *p,
396 enum lttng_thread_type type,
397 enum lttng_execution_mode mode,
398 enum lttng_execution_submode submode,
399 enum lttng_process_status status)
400 {
401 struct nsproxy *proxy;
402 struct pid_namespace *pid_ns;
403 struct user_namespace *user_ns;
404
405 /*
406 * The pid and user namespaces are special, they are nested and
407 * accessed with specific functions instead of the nsproxy struct
408 * like the other namespaces.
409 */
410 pid_ns = task_active_pid_ns(p);
411 do {
412 trace_lttng_statedump_process_pid_ns(session, p, pid_ns);
413 pid_ns = pid_ns ? pid_ns->parent : NULL;
414 } while (pid_ns);
415
416
417 user_ns = task_cred_xxx(p, user_ns);
418 do {
419 trace_lttng_statedump_process_user_ns(session, p, user_ns);
420 /*
421 * trace_lttng_statedump_process_user_ns() internally
422 * checks whether user_ns is NULL. While this does not
423 * appear to be a possible return value for
424 * task_cred_xxx(), err on the safe side and check
425 * for NULL here as well to be consistent with the
426 * paranoid behavior of
427 * trace_lttng_statedump_process_user_ns().
428 */
429 user_ns = user_ns ? user_ns->lttng_user_ns_parent : NULL;
430 } while (user_ns);
431
432 /*
433 * Back and forth on locking strategy within Linux upstream for nsproxy.
434 * See Linux upstream commit 728dba3a39c66b3d8ac889ddbe38b5b1c264aec3
435 * "namespaces: Use task_lock and not rcu to protect nsproxy"
436 * for details.
437 */
438 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0) || \
439 LTTNG_UBUNTU_KERNEL_RANGE(3,13,11,36, 3,14,0,0) || \
440 LTTNG_UBUNTU_KERNEL_RANGE(3,16,1,11, 3,17,0,0) || \
441 LTTNG_RHEL_KERNEL_RANGE(3,10,0,229,13,0, 3,11,0,0,0,0))
442 proxy = p->nsproxy;
443 #else
444 rcu_read_lock();
445 proxy = task_nsproxy(p);
446 #endif
447 if (proxy) {
448 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,6,0))
449 trace_lttng_statedump_process_cgroup_ns(session, p, proxy->cgroup_ns);
450 #endif
451 trace_lttng_statedump_process_ipc_ns(session, p, proxy->ipc_ns);
452 #ifndef LTTNG_MNT_NS_MISSING_HEADER
453 trace_lttng_statedump_process_mnt_ns(session, p, proxy->mnt_ns);
454 #endif
455 trace_lttng_statedump_process_net_ns(session, p, proxy->net_ns);
456 trace_lttng_statedump_process_uts_ns(session, p, proxy->uts_ns);
457 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0))
458 trace_lttng_statedump_process_time_ns(session, p, proxy->time_ns);
459 #endif
460 }
461 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0) || \
462 LTTNG_UBUNTU_KERNEL_RANGE(3,13,11,36, 3,14,0,0) || \
463 LTTNG_UBUNTU_KERNEL_RANGE(3,16,1,11, 3,17,0,0) || \
464 LTTNG_RHEL_KERNEL_RANGE(3,10,0,229,13,0, 3,11,0,0,0,0))
465 /* (nothing) */
466 #else
467 rcu_read_unlock();
468 #endif
469 }
470
471 static
472 int lttng_enumerate_process_states(struct lttng_session *session)
473 {
474 struct task_struct *g, *p;
475 char *tmp;
476
477 tmp = (char *) __get_free_page(GFP_KERNEL);
478 if (!tmp)
479 return -ENOMEM;
480
481 rcu_read_lock();
482 for_each_process(g) {
483 struct files_struct *prev_files = NULL;
484
485 p = g;
486 do {
487 enum lttng_execution_mode mode =
488 LTTNG_MODE_UNKNOWN;
489 enum lttng_execution_submode submode =
490 LTTNG_UNKNOWN;
491 enum lttng_process_status status;
492 enum lttng_thread_type type;
493 struct files_struct *files;
494
495 task_lock(p);
496 if (p->exit_state == EXIT_ZOMBIE)
497 status = LTTNG_ZOMBIE;
498 else if (p->exit_state == EXIT_DEAD)
499 status = LTTNG_DEAD;
500 else if (p->state == TASK_RUNNING) {
501 /* Is this a forked child that has not run yet? */
502 if (list_empty(&p->rt.run_list))
503 status = LTTNG_WAIT_FORK;
504 else
505 /*
506 * All tasks are considered as wait_cpu;
507 * the viewer will sort out if the task
508 * was really running at this time.
509 */
510 status = LTTNG_WAIT_CPU;
511 } else if (p->state &
512 (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)) {
513 /* Task is waiting for something to complete */
514 status = LTTNG_WAIT;
515 } else
516 status = LTTNG_UNNAMED;
517 submode = LTTNG_NONE;
518
519 /*
520 * Verification of t->mm is to filter out kernel
521 * threads; Viewer will further filter out if a
522 * user-space thread was in syscall mode or not.
523 */
524 if (p->mm)
525 type = LTTNG_USER_THREAD;
526 else
527 type = LTTNG_KERNEL_THREAD;
528 files = p->files;
529
530 trace_lttng_statedump_process_state(session,
531 p, type, mode, submode, status, files);
532 lttng_statedump_process_ns(session,
533 p, type, mode, submode, status);
534 /*
535 * As an optimisation for the common case, do not
536 * repeat information for the same files_struct in
537 * two consecutive threads. This is the common case
538 * for threads sharing the same fd table. RCU guarantees
539 * that the same files_struct pointer is not re-used
540 * throughout processes/threads iteration.
541 */
542 if (files && files != prev_files) {
543 lttng_enumerate_files(session, files, tmp);
544 prev_files = files;
545 }
546 task_unlock(p);
547 } while_each_thread(g, p);
548 }
549 rcu_read_unlock();
550
551 free_page((unsigned long) tmp);
552
553 return 0;
554 }
555
556 static
557 void lttng_statedump_work_func(struct work_struct *work)
558 {
559 if (atomic_dec_and_test(&kernel_threads_to_run))
560 /* If we are the last thread, wake up do_lttng_statedump */
561 wake_up(&statedump_wq);
562 }
563
564 static
565 int do_lttng_statedump(struct lttng_session *session)
566 {
567 int cpu, ret;
568
569 trace_lttng_statedump_start(session);
570 ret = lttng_enumerate_process_states(session);
571 if (ret)
572 return ret;
573 /*
574 * FIXME
575 * ret = lttng_enumerate_vm_maps(session);
576 * if (ret)
577 * return ret;
578 */
579 ret = lttng_list_interrupts(session);
580 if (ret)
581 return ret;
582 ret = lttng_enumerate_network_ip_interface(session);
583 if (ret)
584 return ret;
585 ret = lttng_enumerate_block_devices(session);
586 switch (ret) {
587 case 0:
588 break;
589 case -ENOSYS:
590 printk(KERN_WARNING "LTTng: block device enumeration is not supported by kernel\n");
591 break;
592 default:
593 return ret;
594 }
595 ret = lttng_enumerate_cpu_topology(session);
596 if (ret)
597 return ret;
598
599 /* TODO lttng_dump_idt_table(session); */
600 /* TODO lttng_dump_softirq_vec(session); */
601 /* TODO lttng_list_modules(session); */
602 /* TODO lttng_dump_swap_files(session); */
603
604 /*
605 * Fire off a work queue on each CPU. Their sole purpose in life
606 * is to guarantee that each CPU has been in a state where is was in
607 * syscall mode (i.e. not in a trap, an IRQ or a soft IRQ).
608 */
609 get_online_cpus();
610 atomic_set(&kernel_threads_to_run, num_online_cpus());
611 for_each_online_cpu(cpu) {
612 INIT_DELAYED_WORK(&cpu_work[cpu], lttng_statedump_work_func);
613 schedule_delayed_work_on(cpu, &cpu_work[cpu], 0);
614 }
615 /* Wait for all threads to run */
616 __wait_event(statedump_wq, (atomic_read(&kernel_threads_to_run) == 0));
617 put_online_cpus();
618 /* Our work is done */
619 trace_lttng_statedump_end(session);
620 return 0;
621 }
622
623 /*
624 * Called with session mutex held.
625 */
626 int lttng_statedump_start(struct lttng_session *session)
627 {
628 return do_lttng_statedump(session);
629 }
630 EXPORT_SYMBOL_GPL(lttng_statedump_start);
631
632 static
633 int __init lttng_statedump_init(void)
634 {
635 /*
636 * Allow module to load even if the fixup cannot be done. This
637 * will allow seemless transition when the underlying issue fix
638 * is merged into the Linux kernel, and when tracepoint.c
639 * "tracepoint_module_notify" is turned into a static function.
640 */
641 (void) wrapper_lttng_fixup_sig(THIS_MODULE);
642 return 0;
643 }
644
645 module_init(lttng_statedump_init);
646
647 static
648 void __exit lttng_statedump_exit(void)
649 {
650 }
651
652 module_exit(lttng_statedump_exit);
653
654 MODULE_LICENSE("GPL and additional rights");
655 MODULE_AUTHOR("Jean-Hugues Deschenes");
656 MODULE_DESCRIPTION("LTTng statedump provider");
657 MODULE_VERSION(__stringify(LTTNG_MODULES_MAJOR_VERSION) "."
658 __stringify(LTTNG_MODULES_MINOR_VERSION) "."
659 __stringify(LTTNG_MODULES_PATCHLEVEL_VERSION)
660 LTTNG_MODULES_EXTRAVERSION);
This page took 0.041747 seconds and 4 git commands to generate.