wrapper: remove perf wrapper
[lttng-modules.git] / lttng-statedump-impl.c
1 /* SPDX-License-Identifier: (GPL-2.0-only or LGPL-2.1-only)
2 *
3 * lttng-statedump.c
4 *
5 * Linux Trace Toolkit Next Generation Kernel State Dump
6 *
7 * Copyright 2005 Jean-Hugues Deschenes <jean-hugues.deschenes@polymtl.ca>
8 * Copyright 2006-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
9 *
10 * Changes:
11 * Eric Clement: Add listing of network IP interface
12 * 2006, 2007 Mathieu Desnoyers Fix kernel threads
13 * Various updates
14 */
15
16 #include <linux/init.h>
17 #include <linux/module.h>
18 #include <linux/netlink.h>
19 #include <linux/inet.h>
20 #include <linux/ip.h>
21 #include <linux/kthread.h>
22 #include <linux/proc_fs.h>
23 #include <linux/file.h>
24 #include <linux/interrupt.h>
25 #include <linux/irqnr.h>
26 #include <linux/cpu.h>
27 #include <linux/netdevice.h>
28 #include <linux/inetdevice.h>
29 #include <linux/sched.h>
30 #include <linux/mm.h>
31 #include <linux/swap.h>
32 #include <linux/wait.h>
33 #include <linux/mutex.h>
34 #include <linux/device.h>
35 #include <linux/fdtable.h>
36 #include <linux/irq.h>
37
38 #include <lttng-events.h>
39 #include <lttng-tracer.h>
40 #include <wrapper/namespace.h>
41 #include <wrapper/genhd.h>
42
43 /* Define the tracepoints, but do not build the probes */
44 #define CREATE_TRACE_POINTS
45 #define TRACE_INCLUDE_PATH instrumentation/events/lttng-module
46 #define TRACE_INCLUDE_FILE lttng-statedump
47 #define LTTNG_INSTRUMENTATION
48 #include <instrumentation/events/lttng-module/lttng-statedump.h>
49
50 DEFINE_TRACE(lttng_statedump_block_device);
51 DEFINE_TRACE(lttng_statedump_end);
52 DEFINE_TRACE(lttng_statedump_interrupt);
53 DEFINE_TRACE(lttng_statedump_file_descriptor);
54 DEFINE_TRACE(lttng_statedump_start);
55 DEFINE_TRACE(lttng_statedump_process_state);
56 DEFINE_TRACE(lttng_statedump_process_pid_ns);
57 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,6,0))
58 DEFINE_TRACE(lttng_statedump_process_cgroup_ns);
59 #endif
60 DEFINE_TRACE(lttng_statedump_process_ipc_ns);
61 #ifndef LTTNG_MNT_NS_MISSING_HEADER
62 DEFINE_TRACE(lttng_statedump_process_mnt_ns);
63 #endif
64 DEFINE_TRACE(lttng_statedump_process_net_ns);
65 DEFINE_TRACE(lttng_statedump_process_user_ns);
66 DEFINE_TRACE(lttng_statedump_process_uts_ns);
67 DEFINE_TRACE(lttng_statedump_network_interface);
68 #ifdef LTTNG_HAVE_STATEDUMP_CPU_TOPOLOGY
69 DEFINE_TRACE(lttng_statedump_cpu_topology);
70 #endif
71
72 struct lttng_fd_ctx {
73 char *page;
74 struct lttng_session *session;
75 struct files_struct *files;
76 };
77
78 /*
79 * Protected by the trace lock.
80 */
81 static struct delayed_work cpu_work[NR_CPUS];
82 static DECLARE_WAIT_QUEUE_HEAD(statedump_wq);
83 static atomic_t kernel_threads_to_run;
84
85 enum lttng_thread_type {
86 LTTNG_USER_THREAD = 0,
87 LTTNG_KERNEL_THREAD = 1,
88 };
89
90 enum lttng_execution_mode {
91 LTTNG_USER_MODE = 0,
92 LTTNG_SYSCALL = 1,
93 LTTNG_TRAP = 2,
94 LTTNG_IRQ = 3,
95 LTTNG_SOFTIRQ = 4,
96 LTTNG_MODE_UNKNOWN = 5,
97 };
98
99 enum lttng_execution_submode {
100 LTTNG_NONE = 0,
101 LTTNG_UNKNOWN = 1,
102 };
103
104 enum lttng_process_status {
105 LTTNG_UNNAMED = 0,
106 LTTNG_WAIT_FORK = 1,
107 LTTNG_WAIT_CPU = 2,
108 LTTNG_EXIT = 3,
109 LTTNG_ZOMBIE = 4,
110 LTTNG_WAIT = 5,
111 LTTNG_RUN = 6,
112 LTTNG_DEAD = 7,
113 };
114
115 static
116 int lttng_enumerate_block_devices(struct lttng_session *session)
117 {
118 struct class *ptr_block_class;
119 struct device_type *ptr_disk_type;
120 struct class_dev_iter iter;
121 struct device *dev;
122
123 ptr_block_class = wrapper_get_block_class();
124 if (!ptr_block_class)
125 return -ENOSYS;
126 ptr_disk_type = wrapper_get_disk_type();
127 if (!ptr_disk_type) {
128 return -ENOSYS;
129 }
130 class_dev_iter_init(&iter, ptr_block_class, NULL, ptr_disk_type);
131 while ((dev = class_dev_iter_next(&iter))) {
132 struct disk_part_iter piter;
133 struct gendisk *disk = dev_to_disk(dev);
134 struct hd_struct *part;
135
136 /*
137 * Don't show empty devices or things that have been
138 * suppressed
139 */
140 if (get_capacity(disk) == 0 ||
141 (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
142 continue;
143
144 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
145 while ((part = disk_part_iter_next(&piter))) {
146 char name_buf[BDEVNAME_SIZE];
147 char *p;
148
149 p = wrapper_disk_name(disk, part->partno, name_buf);
150 if (!p) {
151 disk_part_iter_exit(&piter);
152 class_dev_iter_exit(&iter);
153 return -ENOSYS;
154 }
155 trace_lttng_statedump_block_device(session,
156 part_devt(part), name_buf);
157 }
158 disk_part_iter_exit(&piter);
159 }
160 class_dev_iter_exit(&iter);
161 return 0;
162 }
163
164 #ifdef CONFIG_INET
165
166 static
167 void lttng_enumerate_device(struct lttng_session *session,
168 struct net_device *dev)
169 {
170 struct in_device *in_dev;
171 struct in_ifaddr *ifa;
172
173 if (dev->flags & IFF_UP) {
174 in_dev = in_dev_get(dev);
175 if (in_dev) {
176 for (ifa = in_dev->ifa_list; ifa != NULL;
177 ifa = ifa->ifa_next) {
178 trace_lttng_statedump_network_interface(
179 session, dev, ifa);
180 }
181 in_dev_put(in_dev);
182 }
183 } else {
184 trace_lttng_statedump_network_interface(
185 session, dev, NULL);
186 }
187 }
188
189 static
190 int lttng_enumerate_network_ip_interface(struct lttng_session *session)
191 {
192 struct net_device *dev;
193
194 read_lock(&dev_base_lock);
195 for_each_netdev(&init_net, dev)
196 lttng_enumerate_device(session, dev);
197 read_unlock(&dev_base_lock);
198
199 return 0;
200 }
201 #else /* CONFIG_INET */
202 static inline
203 int lttng_enumerate_network_ip_interface(struct lttng_session *session)
204 {
205 return 0;
206 }
207 #endif /* CONFIG_INET */
208
209 static
210 int lttng_dump_one_fd(const void *p, struct file *file, unsigned int fd)
211 {
212 const struct lttng_fd_ctx *ctx = p;
213 const char *s = d_path(&file->f_path, ctx->page, PAGE_SIZE);
214 unsigned int flags = file->f_flags;
215 struct fdtable *fdt;
216
217 /*
218 * We don't expose kernel internal flags, only userspace-visible
219 * flags.
220 */
221 flags &= ~FMODE_NONOTIFY;
222 fdt = files_fdtable(ctx->files);
223 /*
224 * We need to check here again whether fd is within the fdt
225 * max_fds range, because we might be seeing a different
226 * files_fdtable() than iterate_fd(), assuming only RCU is
227 * protecting the read. In reality, iterate_fd() holds
228 * file_lock, which should ensure the fdt does not change while
229 * the lock is taken, but we are not aware whether this is
230 * guaranteed or not, so play safe.
231 */
232 if (fd < fdt->max_fds && close_on_exec(fd, fdt))
233 flags |= O_CLOEXEC;
234 if (IS_ERR(s)) {
235 struct dentry *dentry = file->f_path.dentry;
236
237 /* Make sure we give at least some info */
238 spin_lock(&dentry->d_lock);
239 trace_lttng_statedump_file_descriptor(ctx->session,
240 ctx->files, fd, dentry->d_name.name, flags,
241 file->f_mode);
242 spin_unlock(&dentry->d_lock);
243 goto end;
244 }
245 trace_lttng_statedump_file_descriptor(ctx->session,
246 ctx->files, fd, s, flags, file->f_mode);
247 end:
248 return 0;
249 }
250
251 /* Called with task lock held. */
252 static
253 void lttng_enumerate_files(struct lttng_session *session,
254 struct files_struct *files,
255 char *tmp)
256 {
257 struct lttng_fd_ctx ctx = { .page = tmp, .session = session, .files = files, };
258
259 iterate_fd(files, 0, lttng_dump_one_fd, &ctx);
260 }
261
262 #ifdef LTTNG_HAVE_STATEDUMP_CPU_TOPOLOGY
263 static
264 int lttng_enumerate_cpu_topology(struct lttng_session *session)
265 {
266 int cpu;
267 const cpumask_t *cpumask = cpu_possible_mask;
268
269 for (cpu = cpumask_first(cpumask); cpu < nr_cpu_ids;
270 cpu = cpumask_next(cpu, cpumask)) {
271 trace_lttng_statedump_cpu_topology(session, &cpu_data(cpu));
272 }
273
274 return 0;
275 }
276 #else
277 static
278 int lttng_enumerate_cpu_topology(struct lttng_session *session)
279 {
280 return 0;
281 }
282 #endif
283
284 #if 0
285 /*
286 * FIXME: we cannot take a mmap_sem while in a RCU read-side critical section
287 * (scheduling in atomic). Normally, the tasklist lock protects this kind of
288 * iteration, but it is not exported to modules.
289 */
290 static
291 void lttng_enumerate_task_vm_maps(struct lttng_session *session,
292 struct task_struct *p)
293 {
294 struct mm_struct *mm;
295 struct vm_area_struct *map;
296 unsigned long ino;
297
298 /* get_task_mm does a task_lock... */
299 mm = get_task_mm(p);
300 if (!mm)
301 return;
302
303 map = mm->mmap;
304 if (map) {
305 down_read(&mm->mmap_sem);
306 while (map) {
307 if (map->vm_file)
308 ino = map->vm_file->f_path.dentry->d_inode->i_ino;
309 else
310 ino = 0;
311 trace_lttng_statedump_vm_map(session, p, map, ino);
312 map = map->vm_next;
313 }
314 up_read(&mm->mmap_sem);
315 }
316 mmput(mm);
317 }
318
319 static
320 int lttng_enumerate_vm_maps(struct lttng_session *session)
321 {
322 struct task_struct *p;
323
324 rcu_read_lock();
325 for_each_process(p)
326 lttng_enumerate_task_vm_maps(session, p);
327 rcu_read_unlock();
328 return 0;
329 }
330 #endif
331
332 static
333 int lttng_list_interrupts(struct lttng_session *session)
334 {
335 unsigned int irq;
336 unsigned long flags = 0;
337 struct irq_desc *desc;
338
339 /* needs irq_desc */
340 for_each_irq_desc(irq, desc) {
341 struct irqaction *action;
342 const char *irq_chip_name =
343 irq_desc_get_chip(desc)->name ? : "unnamed_irq_chip";
344
345 local_irq_save(flags);
346 raw_spin_lock(&desc->lock);
347 for (action = desc->action; action; action = action->next) {
348 trace_lttng_statedump_interrupt(session,
349 irq, irq_chip_name, action);
350 }
351 raw_spin_unlock(&desc->lock);
352 local_irq_restore(flags);
353 }
354 return 0;
355 }
356
357 /*
358 * Statedump the task's namespaces using the proc filesystem inode number as
359 * the unique identifier. The user and pid ns are nested and will be dumped
360 * recursively.
361 *
362 * Called with task lock held.
363 */
364 static
365 void lttng_statedump_process_ns(struct lttng_session *session,
366 struct task_struct *p,
367 enum lttng_thread_type type,
368 enum lttng_execution_mode mode,
369 enum lttng_execution_submode submode,
370 enum lttng_process_status status)
371 {
372 struct nsproxy *proxy;
373 struct pid_namespace *pid_ns;
374 struct user_namespace *user_ns;
375
376 /*
377 * The pid and user namespaces are special, they are nested and
378 * accessed with specific functions instead of the nsproxy struct
379 * like the other namespaces.
380 */
381 pid_ns = task_active_pid_ns(p);
382 do {
383 trace_lttng_statedump_process_pid_ns(session, p, pid_ns);
384 pid_ns = pid_ns ? pid_ns->parent : NULL;
385 } while (pid_ns);
386
387
388 user_ns = task_cred_xxx(p, user_ns);
389 do {
390 trace_lttng_statedump_process_user_ns(session, p, user_ns);
391 /*
392 * trace_lttng_statedump_process_user_ns() internally
393 * checks whether user_ns is NULL. While this does not
394 * appear to be a possible return value for
395 * task_cred_xxx(), err on the safe side and check
396 * for NULL here as well to be consistent with the
397 * paranoid behavior of
398 * trace_lttng_statedump_process_user_ns().
399 */
400 user_ns = user_ns ? user_ns->lttng_user_ns_parent : NULL;
401 } while (user_ns);
402
403 /*
404 * Back and forth on locking strategy within Linux upstream for nsproxy.
405 * See Linux upstream commit 728dba3a39c66b3d8ac889ddbe38b5b1c264aec3
406 * "namespaces: Use task_lock and not rcu to protect nsproxy"
407 * for details.
408 */
409 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0) || \
410 LTTNG_UBUNTU_KERNEL_RANGE(3,13,11,36, 3,14,0,0) || \
411 LTTNG_UBUNTU_KERNEL_RANGE(3,16,1,11, 3,17,0,0) || \
412 LTTNG_RHEL_KERNEL_RANGE(3,10,0,229,13,0, 3,11,0,0,0,0))
413 proxy = p->nsproxy;
414 #else
415 rcu_read_lock();
416 proxy = task_nsproxy(p);
417 #endif
418 if (proxy) {
419 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,6,0))
420 trace_lttng_statedump_process_cgroup_ns(session, p, proxy->cgroup_ns);
421 #endif
422 trace_lttng_statedump_process_ipc_ns(session, p, proxy->ipc_ns);
423 #ifndef LTTNG_MNT_NS_MISSING_HEADER
424 trace_lttng_statedump_process_mnt_ns(session, p, proxy->mnt_ns);
425 #endif
426 trace_lttng_statedump_process_net_ns(session, p, proxy->net_ns);
427 trace_lttng_statedump_process_uts_ns(session, p, proxy->uts_ns);
428 }
429 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0) || \
430 LTTNG_UBUNTU_KERNEL_RANGE(3,13,11,36, 3,14,0,0) || \
431 LTTNG_UBUNTU_KERNEL_RANGE(3,16,1,11, 3,17,0,0) || \
432 LTTNG_RHEL_KERNEL_RANGE(3,10,0,229,13,0, 3,11,0,0,0,0))
433 /* (nothing) */
434 #else
435 rcu_read_unlock();
436 #endif
437 }
438
439 static
440 int lttng_enumerate_process_states(struct lttng_session *session)
441 {
442 struct task_struct *g, *p;
443 char *tmp;
444
445 tmp = (char *) __get_free_page(GFP_KERNEL);
446 if (!tmp)
447 return -ENOMEM;
448
449 rcu_read_lock();
450 for_each_process(g) {
451 struct files_struct *prev_files = NULL;
452
453 p = g;
454 do {
455 enum lttng_execution_mode mode =
456 LTTNG_MODE_UNKNOWN;
457 enum lttng_execution_submode submode =
458 LTTNG_UNKNOWN;
459 enum lttng_process_status status;
460 enum lttng_thread_type type;
461 struct files_struct *files;
462
463 task_lock(p);
464 if (p->exit_state == EXIT_ZOMBIE)
465 status = LTTNG_ZOMBIE;
466 else if (p->exit_state == EXIT_DEAD)
467 status = LTTNG_DEAD;
468 else if (p->state == TASK_RUNNING) {
469 /* Is this a forked child that has not run yet? */
470 if (list_empty(&p->rt.run_list))
471 status = LTTNG_WAIT_FORK;
472 else
473 /*
474 * All tasks are considered as wait_cpu;
475 * the viewer will sort out if the task
476 * was really running at this time.
477 */
478 status = LTTNG_WAIT_CPU;
479 } else if (p->state &
480 (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)) {
481 /* Task is waiting for something to complete */
482 status = LTTNG_WAIT;
483 } else
484 status = LTTNG_UNNAMED;
485 submode = LTTNG_NONE;
486
487 /*
488 * Verification of t->mm is to filter out kernel
489 * threads; Viewer will further filter out if a
490 * user-space thread was in syscall mode or not.
491 */
492 if (p->mm)
493 type = LTTNG_USER_THREAD;
494 else
495 type = LTTNG_KERNEL_THREAD;
496 files = p->files;
497
498 trace_lttng_statedump_process_state(session,
499 p, type, mode, submode, status, files);
500 lttng_statedump_process_ns(session,
501 p, type, mode, submode, status);
502 /*
503 * As an optimisation for the common case, do not
504 * repeat information for the same files_struct in
505 * two consecutive threads. This is the common case
506 * for threads sharing the same fd table. RCU guarantees
507 * that the same files_struct pointer is not re-used
508 * throughout processes/threads iteration.
509 */
510 if (files && files != prev_files) {
511 lttng_enumerate_files(session, files, tmp);
512 prev_files = files;
513 }
514 task_unlock(p);
515 } while_each_thread(g, p);
516 }
517 rcu_read_unlock();
518
519 free_page((unsigned long) tmp);
520
521 return 0;
522 }
523
524 static
525 void lttng_statedump_work_func(struct work_struct *work)
526 {
527 if (atomic_dec_and_test(&kernel_threads_to_run))
528 /* If we are the last thread, wake up do_lttng_statedump */
529 wake_up(&statedump_wq);
530 }
531
532 static
533 int do_lttng_statedump(struct lttng_session *session)
534 {
535 int cpu, ret;
536
537 trace_lttng_statedump_start(session);
538 ret = lttng_enumerate_process_states(session);
539 if (ret)
540 return ret;
541 /*
542 * FIXME
543 * ret = lttng_enumerate_vm_maps(session);
544 * if (ret)
545 * return ret;
546 */
547 ret = lttng_list_interrupts(session);
548 if (ret)
549 return ret;
550 ret = lttng_enumerate_network_ip_interface(session);
551 if (ret)
552 return ret;
553 ret = lttng_enumerate_block_devices(session);
554 switch (ret) {
555 case 0:
556 break;
557 case -ENOSYS:
558 printk(KERN_WARNING "LTTng: block device enumeration is not supported by kernel\n");
559 break;
560 default:
561 return ret;
562 }
563 ret = lttng_enumerate_cpu_topology(session);
564 if (ret)
565 return ret;
566
567 /* TODO lttng_dump_idt_table(session); */
568 /* TODO lttng_dump_softirq_vec(session); */
569 /* TODO lttng_list_modules(session); */
570 /* TODO lttng_dump_swap_files(session); */
571
572 /*
573 * Fire off a work queue on each CPU. Their sole purpose in life
574 * is to guarantee that each CPU has been in a state where is was in
575 * syscall mode (i.e. not in a trap, an IRQ or a soft IRQ).
576 */
577 get_online_cpus();
578 atomic_set(&kernel_threads_to_run, num_online_cpus());
579 for_each_online_cpu(cpu) {
580 INIT_DELAYED_WORK(&cpu_work[cpu], lttng_statedump_work_func);
581 schedule_delayed_work_on(cpu, &cpu_work[cpu], 0);
582 }
583 /* Wait for all threads to run */
584 __wait_event(statedump_wq, (atomic_read(&kernel_threads_to_run) == 0));
585 put_online_cpus();
586 /* Our work is done */
587 trace_lttng_statedump_end(session);
588 return 0;
589 }
590
591 /*
592 * Called with session mutex held.
593 */
594 int lttng_statedump_start(struct lttng_session *session)
595 {
596 return do_lttng_statedump(session);
597 }
598 EXPORT_SYMBOL_GPL(lttng_statedump_start);
599
600 static
601 int __init lttng_statedump_init(void)
602 {
603 return 0;
604 }
605
606 module_init(lttng_statedump_init);
607
608 static
609 void __exit lttng_statedump_exit(void)
610 {
611 }
612
613 module_exit(lttng_statedump_exit);
614
615 MODULE_LICENSE("GPL and additional rights");
616 MODULE_AUTHOR("Jean-Hugues Deschenes");
617 MODULE_DESCRIPTION("LTTng statedump provider");
618 MODULE_VERSION(__stringify(LTTNG_MODULES_MAJOR_VERSION) "."
619 __stringify(LTTNG_MODULES_MINOR_VERSION) "."
620 __stringify(LTTNG_MODULES_PATCHLEVEL_VERSION)
621 LTTNG_MODULES_EXTRAVERSION);
This page took 0.04416 seconds and 4 git commands to generate.