0f34ff15700782428349b9c2a638d16150811666
[lttng-modules.git] / instrumentation / events / lttng-module / sched.h
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #undef TRACE_SYSTEM
3 #define TRACE_SYSTEM sched
4
5 #if !defined(LTTNG_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
6 #define LTTNG_TRACE_SCHED_H
7
8 #include <probes/lttng-tracepoint-event.h>
9 #include <linux/sched.h>
10 #include <linux/pid_namespace.h>
11 #include <linux/binfmts.h>
12 #include <linux/version.h>
13 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,9,0))
14 #include <linux/sched/rt.h>
15 #endif
16 #include <wrapper/namespace.h>
17
18 #define LTTNG_MAX_PID_NS_LEVEL 32
19
20 #ifndef _TRACE_SCHED_DEF_
21 #define _TRACE_SCHED_DEF_
22
23 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,15,0))
24
25 static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
26 {
27 unsigned int state;
28
29 #ifdef CONFIG_SCHED_DEBUG
30 BUG_ON(p != current);
31 #endif /* CONFIG_SCHED_DEBUG */
32
33 /*
34 * Preemption ignores task state, therefore preempted tasks are always
35 * RUNNING (we will not have dequeued if state != RUNNING).
36 */
37 if (preempt)
38 return TASK_REPORT_MAX;
39
40 /*
41 * task_state_index() uses fls() and returns a value from 0-8 range.
42 * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
43 * it for left shift operation to get the correct task->state
44 * mapping.
45 */
46 state = task_state_index(p);
47
48 return state ? (1 << (state - 1)) : state;
49 }
50
51 #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(4,14,0))
52
53 static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
54 {
55 unsigned int state;
56
57 #ifdef CONFIG_SCHED_DEBUG
58 BUG_ON(p != current);
59 #endif /* CONFIG_SCHED_DEBUG */
60
61 /*
62 * Preemption ignores task state, therefore preempted tasks are always
63 * RUNNING (we will not have dequeued if state != RUNNING).
64 */
65 if (preempt)
66 return TASK_REPORT_MAX;
67
68 /*
69 * __get_task_state() uses fls() and returns a value from 0-8 range.
70 * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
71 * it for left shift operation to get the correct task->state
72 * mapping.
73 */
74 state = __get_task_state(p);
75
76 return state ? (1 << (state - 1)) : state;
77 }
78
79 #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0))
80
81 static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
82 {
83 #ifdef CONFIG_SCHED_DEBUG
84 BUG_ON(p != current);
85 #endif /* CONFIG_SCHED_DEBUG */
86 /*
87 * Preemption ignores task state, therefore preempted tasks are always RUNNING
88 * (we will not have dequeued if state != RUNNING).
89 */
90 return preempt ? TASK_RUNNING | TASK_STATE_MAX : p->state;
91 }
92
93 #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0))
94
95 static inline long __trace_sched_switch_state(struct task_struct *p)
96 {
97 long state = p->state;
98
99 #ifdef CONFIG_PREEMPT
100 #ifdef CONFIG_SCHED_DEBUG
101 BUG_ON(p != current);
102 #endif /* CONFIG_SCHED_DEBUG */
103 /*
104 * For all intents and purposes a preempted task is a running task.
105 */
106 if (preempt_count() & PREEMPT_ACTIVE)
107 state = TASK_RUNNING | TASK_STATE_MAX;
108 #endif /* CONFIG_PREEMPT */
109
110 return state;
111 }
112
113 #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3,13,0))
114
115 static inline long __trace_sched_switch_state(struct task_struct *p)
116 {
117 long state = p->state;
118
119 #ifdef CONFIG_PREEMPT
120 /*
121 * For all intents and purposes a preempted task is a running task.
122 */
123 if (task_preempt_count(p) & PREEMPT_ACTIVE)
124 state = TASK_RUNNING | TASK_STATE_MAX;
125 #endif
126
127 return state;
128 }
129
130 #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0))
131
132 static inline long __trace_sched_switch_state(struct task_struct *p)
133 {
134 long state = p->state;
135
136 #ifdef CONFIG_PREEMPT
137 /*
138 * For all intents and purposes a preempted task is a running task.
139 */
140 if (task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)
141 state = TASK_RUNNING | TASK_STATE_MAX;
142 #endif
143
144 return state;
145 }
146
147 #else
148
149 static inline long __trace_sched_switch_state(struct task_struct *p)
150 {
151 long state = p->state;
152
153 #ifdef CONFIG_PREEMPT
154 /*
155 * For all intents and purposes a preempted task is a running task.
156 */
157 if (task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)
158 state = TASK_RUNNING;
159 #endif
160
161 return state;
162 }
163
164 #endif
165
166 #endif /* _TRACE_SCHED_DEF_ */
167
168 /*
169 * Tracepoint for calling kthread_stop, performed to end a kthread:
170 */
171 LTTNG_TRACEPOINT_EVENT(sched_kthread_stop,
172
173 TP_PROTO(struct task_struct *t),
174
175 TP_ARGS(t),
176
177 TP_FIELDS(
178 ctf_array_text(char, comm, t->comm, TASK_COMM_LEN)
179 ctf_integer(pid_t, tid, t->pid)
180 )
181 )
182
183 /*
184 * Tracepoint for the return value of the kthread stopping:
185 */
186 LTTNG_TRACEPOINT_EVENT(sched_kthread_stop_ret,
187
188 TP_PROTO(int ret),
189
190 TP_ARGS(ret),
191
192 TP_FIELDS(
193 ctf_integer(int, ret, ret)
194 )
195 )
196
197 /*
198 * Tracepoint for waking up a task:
199 */
200 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0) || \
201 LTTNG_RT_KERNEL_RANGE(4,1,10,11, 4,2,0,0) || \
202 LTTNG_RT_KERNEL_RANGE(3,18,27,26, 3,19,0,0) || \
203 LTTNG_RT_KERNEL_RANGE(3,14,61,63, 3,15,0,0) || \
204 LTTNG_RT_KERNEL_RANGE(3,12,54,73, 3,13,0,0) || \
205 LTTNG_RT_KERNEL_RANGE(3,10,97,106, 3,11,0,0) || \
206 LTTNG_RT_KERNEL_RANGE(3,4,110,139, 3,5,0,0) || \
207 LTTNG_RT_KERNEL_RANGE(3,2,77,111, 3,3,0,0))
208 LTTNG_TRACEPOINT_EVENT_CLASS(sched_wakeup_template,
209
210 TP_PROTO(struct task_struct *p),
211
212 TP_ARGS(p),
213
214 TP_FIELDS(
215 ctf_array_text(char, comm, p->comm, TASK_COMM_LEN)
216 ctf_integer(pid_t, tid, p->pid)
217 ctf_integer(int, prio, p->prio - MAX_RT_PRIO)
218 ctf_integer(int, target_cpu, task_cpu(p))
219 )
220 )
221 #else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0)) */
222 LTTNG_TRACEPOINT_EVENT_CLASS(sched_wakeup_template,
223
224 TP_PROTO(struct task_struct *p, int success),
225
226 TP_ARGS(p, success),
227
228 TP_FIELDS(
229 ctf_array_text(char, comm, p->comm, TASK_COMM_LEN)
230 ctf_integer(pid_t, tid, p->pid)
231 ctf_integer(int, prio, p->prio - MAX_RT_PRIO)
232 ctf_integer(int, success, success)
233 ctf_integer(int, target_cpu, task_cpu(p))
234 )
235 )
236 #endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0)) */
237
238 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0) || \
239 LTTNG_RT_KERNEL_RANGE(4,1,10,11, 4,2,0,0) || \
240 LTTNG_RT_KERNEL_RANGE(3,18,27,26, 3,19,0,0) || \
241 LTTNG_RT_KERNEL_RANGE(3,14,61,63, 3,15,0,0) || \
242 LTTNG_RT_KERNEL_RANGE(3,12,54,73, 3,13,0,0) || \
243 LTTNG_RT_KERNEL_RANGE(3,10,97,106, 3,11,0,0) || \
244 LTTNG_RT_KERNEL_RANGE(3,4,110,139, 3,5,0,0) || \
245 LTTNG_RT_KERNEL_RANGE(3,2,77,111, 3,3,0,0))
246
247 /*
248 * Tracepoint called when waking a task; this tracepoint is guaranteed to be
249 * called from the waking context.
250 */
251 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_waking,
252 TP_PROTO(struct task_struct *p),
253 TP_ARGS(p))
254
255 /*
256 * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG.
257 * It it not always called from the waking context.
258 */
259 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup,
260 TP_PROTO(struct task_struct *p),
261 TP_ARGS(p))
262
263 /*
264 * Tracepoint for waking up a new task:
265 */
266 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup_new,
267 TP_PROTO(struct task_struct *p),
268 TP_ARGS(p))
269
270 #else
271
272 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup,
273 TP_PROTO(struct task_struct *p, int success),
274 TP_ARGS(p, success))
275
276 /*
277 * Tracepoint for waking up a new task:
278 */
279 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup_new,
280 TP_PROTO(struct task_struct *p, int success),
281 TP_ARGS(p, success))
282
283 #endif /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0)) */
284
285 /*
286 * Tracepoint for task switches, performed by the scheduler:
287 */
288 LTTNG_TRACEPOINT_EVENT(sched_switch,
289
290 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0))
291 TP_PROTO(bool preempt,
292 struct task_struct *prev,
293 struct task_struct *next),
294
295 TP_ARGS(preempt, prev, next),
296 #else
297 TP_PROTO(struct task_struct *prev,
298 struct task_struct *next),
299
300 TP_ARGS(prev, next),
301 #endif /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0)) */
302
303 TP_FIELDS(
304 ctf_array_text(char, prev_comm, prev->comm, TASK_COMM_LEN)
305 ctf_integer(pid_t, prev_tid, prev->pid)
306 ctf_integer(int, prev_prio, prev->prio - MAX_RT_PRIO)
307 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0))
308 ctf_integer(long, prev_state, __trace_sched_switch_state(preempt, prev))
309 #else
310 ctf_integer(long, prev_state, __trace_sched_switch_state(prev))
311 #endif
312 ctf_array_text(char, next_comm, next->comm, TASK_COMM_LEN)
313 ctf_integer(pid_t, next_tid, next->pid)
314 ctf_integer(int, next_prio, next->prio - MAX_RT_PRIO)
315 )
316 )
317
318 /*
319 * Tracepoint for a task being migrated:
320 */
321 LTTNG_TRACEPOINT_EVENT(sched_migrate_task,
322
323 TP_PROTO(struct task_struct *p, int dest_cpu),
324
325 TP_ARGS(p, dest_cpu),
326
327 TP_FIELDS(
328 ctf_array_text(char, comm, p->comm, TASK_COMM_LEN)
329 ctf_integer(pid_t, tid, p->pid)
330 ctf_integer(int, prio, p->prio - MAX_RT_PRIO)
331 ctf_integer(int, orig_cpu, task_cpu(p))
332 ctf_integer(int, dest_cpu, dest_cpu)
333 )
334 )
335
336 LTTNG_TRACEPOINT_EVENT_CLASS(sched_process_template,
337
338 TP_PROTO(struct task_struct *p),
339
340 TP_ARGS(p),
341
342 TP_FIELDS(
343 ctf_array_text(char, comm, p->comm, TASK_COMM_LEN)
344 ctf_integer(pid_t, tid, p->pid)
345 ctf_integer(int, prio, p->prio - MAX_RT_PRIO)
346 )
347 )
348
349 /*
350 * Tracepoint for freeing a task:
351 */
352 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_process_free,
353 TP_PROTO(struct task_struct *p),
354 TP_ARGS(p))
355
356
357 /*
358 * Tracepoint for a task exiting:
359 */
360 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_process_exit,
361 TP_PROTO(struct task_struct *p),
362 TP_ARGS(p))
363
364 /*
365 * Tracepoint for waiting on task to unschedule:
366 */
367 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_wait_task,
368 TP_PROTO(struct task_struct *p),
369 TP_ARGS(p))
370
371 /*
372 * Tracepoint for a waiting task:
373 */
374 LTTNG_TRACEPOINT_EVENT(sched_process_wait,
375
376 TP_PROTO(struct pid *pid),
377
378 TP_ARGS(pid),
379
380 TP_FIELDS(
381 ctf_array_text(char, comm, current->comm, TASK_COMM_LEN)
382 ctf_integer(pid_t, tid, pid_nr(pid))
383 ctf_integer(int, prio, current->prio - MAX_RT_PRIO)
384 )
385 )
386
387 /*
388 * Tracepoint for do_fork.
389 * Saving both TID and PID information, especially for the child, allows
390 * trace analyzers to distinguish between creation of a new process and
391 * creation of a new thread. Newly created processes will have child_tid
392 * == child_pid, while creation of a thread yields to child_tid !=
393 * child_pid.
394 */
395 LTTNG_TRACEPOINT_EVENT_CODE(sched_process_fork,
396
397 TP_PROTO(struct task_struct *parent, struct task_struct *child),
398
399 TP_ARGS(parent, child),
400
401 TP_locvar(
402 pid_t vtids[LTTNG_MAX_PID_NS_LEVEL];
403 unsigned int ns_level;
404 ),
405
406 TP_code_pre(
407 if (child) {
408 struct pid *child_pid;
409 unsigned int i;
410
411 child_pid = task_pid(child);
412 tp_locvar->ns_level =
413 min_t(unsigned int, child_pid->level + 1,
414 LTTNG_MAX_PID_NS_LEVEL);
415 for (i = 0; i < tp_locvar->ns_level; i++)
416 tp_locvar->vtids[i] = child_pid->numbers[i].nr;
417 }
418 ),
419
420 TP_FIELDS(
421 ctf_array_text(char, parent_comm, parent->comm, TASK_COMM_LEN)
422 ctf_integer(pid_t, parent_tid, parent->pid)
423 ctf_integer(pid_t, parent_pid, parent->tgid)
424 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0))
425 ctf_integer(unsigned int, parent_ns_inum,
426 ({
427 unsigned int parent_ns_inum = 0;
428
429 if (parent) {
430 struct pid_namespace *pid_ns;
431
432 pid_ns = task_active_pid_ns(parent);
433 if (pid_ns)
434 parent_ns_inum =
435 pid_ns->lttng_ns_inum;
436 }
437 parent_ns_inum;
438 }))
439 #endif
440 ctf_array_text(char, child_comm, child->comm, TASK_COMM_LEN)
441 ctf_integer(pid_t, child_tid, child->pid)
442 ctf_sequence(pid_t, vtids, tp_locvar->vtids, u8, tp_locvar->ns_level)
443 ctf_integer(pid_t, child_pid, child->tgid)
444 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0))
445 ctf_integer(unsigned int, child_ns_inum,
446 ({
447 unsigned int child_ns_inum = 0;
448
449 if (child) {
450 struct pid_namespace *pid_ns;
451
452 pid_ns = task_active_pid_ns(child);
453 if (pid_ns)
454 child_ns_inum =
455 pid_ns->lttng_ns_inum;
456 }
457 child_ns_inum;
458 }))
459 #endif
460 ),
461
462 TP_code_post()
463 )
464
465 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,4,0))
466 /*
467 * Tracepoint for exec:
468 */
469 LTTNG_TRACEPOINT_EVENT(sched_process_exec,
470
471 TP_PROTO(struct task_struct *p, pid_t old_pid,
472 struct linux_binprm *bprm),
473
474 TP_ARGS(p, old_pid, bprm),
475
476 TP_FIELDS(
477 ctf_string(filename, bprm->filename)
478 ctf_integer(pid_t, tid, p->pid)
479 ctf_integer(pid_t, old_tid, old_pid)
480 )
481 )
482 #endif
483
484 /*
485 * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
486 * adding sched_stat support to SCHED_FIFO/RR would be welcome.
487 */
488 LTTNG_TRACEPOINT_EVENT_CLASS(sched_stat_template,
489
490 TP_PROTO(struct task_struct *tsk, u64 delay),
491
492 TP_ARGS(tsk, delay),
493
494 TP_FIELDS(
495 ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN)
496 ctf_integer(pid_t, tid, tsk->pid)
497 ctf_integer(u64, delay, delay)
498 )
499 )
500
501
502 /*
503 * Tracepoint for accounting wait time (time the task is runnable
504 * but not actually running due to scheduler contention).
505 */
506 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_wait,
507 TP_PROTO(struct task_struct *tsk, u64 delay),
508 TP_ARGS(tsk, delay))
509
510 /*
511 * Tracepoint for accounting sleep time (time the task is not runnable,
512 * including iowait, see below).
513 */
514 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_sleep,
515 TP_PROTO(struct task_struct *tsk, u64 delay),
516 TP_ARGS(tsk, delay))
517
518 /*
519 * Tracepoint for accounting iowait time (time the task is not runnable
520 * due to waiting on IO to complete).
521 */
522 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_iowait,
523 TP_PROTO(struct task_struct *tsk, u64 delay),
524 TP_ARGS(tsk, delay))
525
526 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,3,0))
527 /*
528 * Tracepoint for accounting blocked time (time the task is in uninterruptible).
529 */
530 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_blocked,
531 TP_PROTO(struct task_struct *tsk, u64 delay),
532 TP_ARGS(tsk, delay))
533 #endif
534
535 /*
536 * Tracepoint for accounting runtime (time the task is executing
537 * on a CPU).
538 */
539 LTTNG_TRACEPOINT_EVENT(sched_stat_runtime,
540
541 TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
542
543 TP_ARGS(tsk, runtime, vruntime),
544
545 TP_FIELDS(
546 ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN)
547 ctf_integer(pid_t, tid, tsk->pid)
548 ctf_integer(u64, runtime, runtime)
549 ctf_integer(u64, vruntime, vruntime)
550 )
551 )
552
553 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) || \
554 LTTNG_RT_KERNEL_RANGE(4,9,27,18, 4,10,0,0) || \
555 LTTNG_RT_KERNEL_RANGE(4,11,5,1, 4,12,0,0))
556 /*
557 * Tracepoint for showing priority inheritance modifying a tasks
558 * priority.
559 */
560 LTTNG_TRACEPOINT_EVENT(sched_pi_setprio,
561
562 TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task),
563
564 TP_ARGS(tsk, pi_task),
565
566 TP_FIELDS(
567 ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN)
568 ctf_integer(pid_t, tid, tsk->pid)
569 ctf_integer(int, oldprio, tsk->prio - MAX_RT_PRIO)
570 ctf_integer(int, newprio, pi_task ? pi_task->prio - MAX_RT_PRIO : tsk->prio - MAX_RT_PRIO)
571 )
572 )
573 #else
574 /*
575 * Tracepoint for showing priority inheritance modifying a tasks
576 * priority.
577 */
578 LTTNG_TRACEPOINT_EVENT(sched_pi_setprio,
579
580 TP_PROTO(struct task_struct *tsk, int newprio),
581
582 TP_ARGS(tsk, newprio),
583
584 TP_FIELDS(
585 ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN)
586 ctf_integer(pid_t, tid, tsk->pid)
587 ctf_integer(int, oldprio, tsk->prio - MAX_RT_PRIO)
588 ctf_integer(int, newprio, newprio - MAX_RT_PRIO)
589 )
590 )
591 #endif
592
593 #endif /* LTTNG_TRACE_SCHED_H */
594
595 /* This part must be outside protection */
596 #include <probes/define_trace.h>
This page took 0.040308 seconds and 3 git commands to generate.