Fix: sched_stat_runtime changed in linux 6.8.0-rc1
[lttng-modules.git] / include / instrumentation / events / sched.h
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 #undef TRACE_SYSTEM
3 #define TRACE_SYSTEM sched
4
5 #if !defined(LTTNG_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
6 #define LTTNG_TRACE_SCHED_H
7
8 #include <lttng/tracepoint-event.h>
9 #include <linux/sched.h>
10 #include <linux/pid_namespace.h>
11 #include <linux/binfmts.h>
12 #include <lttng/kernel-version.h>
13 #include <linux/sched/rt.h>
14
15 #define LTTNG_MAX_PID_NS_LEVEL 32
16
17 #ifndef _TRACE_SCHED_DEF_
18 #define _TRACE_SCHED_DEF_
19
20 #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,18,0) \
21 || LTTNG_RHEL_KERNEL_RANGE(5,14,0,162,0,0, 5,15,0,0,0,0))
22
23 static inline long __trace_sched_switch_state(bool preempt,
24 unsigned int prev_state,
25 struct task_struct *p)
26 {
27 unsigned int state;
28
29 #ifdef CONFIG_SCHED_DEBUG
30 BUG_ON(p != current);
31 #endif /* CONFIG_SCHED_DEBUG */
32
33 /*
34 * Preemption ignores task state, therefore preempted tasks are always
35 * RUNNING (we will not have dequeued if state != RUNNING).
36 */
37 if (preempt)
38 return TASK_REPORT_MAX;
39
40 /*
41 * task_state_index() uses fls() and returns a value from 0-8 range.
42 * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
43 * it for left shift operation to get the correct task->state
44 * mapping.
45 */
46 state = __task_state_index(prev_state, p->exit_state);
47
48 return state ? (1 << (state - 1)) : state;
49 }
50
51 #elif (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,15,0))
52
53 static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
54 {
55 unsigned int state;
56
57 #ifdef CONFIG_SCHED_DEBUG
58 BUG_ON(p != current);
59 #endif /* CONFIG_SCHED_DEBUG */
60
61 /*
62 * Preemption ignores task state, therefore preempted tasks are always
63 * RUNNING (we will not have dequeued if state != RUNNING).
64 */
65 if (preempt)
66 return TASK_REPORT_MAX;
67
68 /*
69 * task_state_index() uses fls() and returns a value from 0-8 range.
70 * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
71 * it for left shift operation to get the correct task->state
72 * mapping.
73 */
74 state = task_state_index(p);
75
76 return state ? (1 << (state - 1)) : state;
77 }
78
79 #elif (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,14,0))
80
81 static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
82 {
83 unsigned int state;
84
85 #ifdef CONFIG_SCHED_DEBUG
86 BUG_ON(p != current);
87 #endif /* CONFIG_SCHED_DEBUG */
88
89 /*
90 * Preemption ignores task state, therefore preempted tasks are always
91 * RUNNING (we will not have dequeued if state != RUNNING).
92 */
93 if (preempt)
94 return TASK_REPORT_MAX;
95
96 /*
97 * __get_task_state() uses fls() and returns a value from 0-8 range.
98 * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
99 * it for left shift operation to get the correct task->state
100 * mapping.
101 */
102 state = __get_task_state(p);
103
104 return state ? (1 << (state - 1)) : state;
105 }
106
107 #else
108
109 static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
110 {
111 #ifdef CONFIG_SCHED_DEBUG
112 BUG_ON(p != current);
113 #endif /* CONFIG_SCHED_DEBUG */
114 /*
115 * Preemption ignores task state, therefore preempted tasks are always RUNNING
116 * (we will not have dequeued if state != RUNNING).
117 */
118 return preempt ? TASK_RUNNING | TASK_STATE_MAX : p->state;
119 }
120 #endif
121
122 #endif /* _TRACE_SCHED_DEF_ */
123
124 #ifdef CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM
125 /*
126 * Enumeration of the task state bitmask.
127 * Only bit flags are enumerated here, not composition of states.
128 */
129 LTTNG_TRACEPOINT_ENUM(task_state,
130 TP_ENUM_VALUES(
131 ctf_enum_value("TASK_RUNNING", TASK_RUNNING)
132 ctf_enum_value("TASK_INTERRUPTIBLE", TASK_INTERRUPTIBLE)
133 ctf_enum_value("TASK_UNINTERRUPTIBLE", TASK_UNINTERRUPTIBLE)
134 ctf_enum_value("TASK_STOPPED", __TASK_STOPPED)
135 ctf_enum_value("TASK_TRACED", __TASK_TRACED)
136 ctf_enum_value("EXIT_DEAD", EXIT_DEAD)
137 ctf_enum_value("EXIT_ZOMBIE", EXIT_ZOMBIE)
138 ctf_enum_value("TASK_PARKED", TASK_PARKED)
139 ctf_enum_value("TASK_DEAD", TASK_DEAD)
140 ctf_enum_value("TASK_WAKEKILL", TASK_WAKEKILL)
141 ctf_enum_value("TASK_WAKING", TASK_WAKING)
142 ctf_enum_value("TASK_NOLOAD", TASK_NOLOAD)
143
144 #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,8,0))
145 ctf_enum_value("TASK_NEW", TASK_NEW)
146 #endif /* #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,8,0)) */
147
148 ctf_enum_value("TASK_STATE_MAX", TASK_STATE_MAX)
149 )
150 )
151 #endif /* CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM */
152
153 /*
154 * Tracepoint for calling kthread_stop, performed to end a kthread:
155 */
156 LTTNG_TRACEPOINT_EVENT(sched_kthread_stop,
157
158 TP_PROTO(struct task_struct *t),
159
160 TP_ARGS(t),
161
162 TP_FIELDS(
163 ctf_array_text(char, comm, t->comm, TASK_COMM_LEN)
164 ctf_integer(pid_t, tid, t->pid)
165 )
166 )
167
168 /*
169 * Tracepoint for the return value of the kthread stopping:
170 */
171 LTTNG_TRACEPOINT_EVENT(sched_kthread_stop_ret,
172
173 TP_PROTO(int ret),
174
175 TP_ARGS(ret),
176
177 TP_FIELDS(
178 ctf_integer(int, ret, ret)
179 )
180 )
181
182 /*
183 * Tracepoint for waking up a task:
184 */
185 LTTNG_TRACEPOINT_EVENT_CLASS(sched_wakeup_template,
186
187 TP_PROTO(struct task_struct *p),
188
189 TP_ARGS(p),
190
191 TP_FIELDS(
192 ctf_array_text(char, comm, p->comm, TASK_COMM_LEN)
193 ctf_integer(pid_t, tid, p->pid)
194 ctf_integer(int, prio, p->prio - MAX_RT_PRIO)
195 ctf_integer(int, target_cpu, task_cpu(p))
196 )
197 )
198
199 /*
200 * Tracepoint called when waking a task; this tracepoint is guaranteed to be
201 * called from the waking context.
202 */
203 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_waking,
204 TP_PROTO(struct task_struct *p),
205 TP_ARGS(p))
206
207 /*
208 * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG.
209 * It it not always called from the waking context.
210 */
211 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup,
212 TP_PROTO(struct task_struct *p),
213 TP_ARGS(p))
214
215 /*
216 * Tracepoint for waking up a new task:
217 */
218 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup_new,
219 TP_PROTO(struct task_struct *p),
220 TP_ARGS(p))
221
222 /*
223 * Tracepoint for task switches, performed by the scheduler:
224 */
225
226 #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,18,0) \
227 || LTTNG_RHEL_KERNEL_RANGE(5,14,0,162,0,0, 5,15,0,0,0,0))
228 LTTNG_TRACEPOINT_EVENT(sched_switch,
229
230 TP_PROTO(bool preempt,
231 struct task_struct *prev,
232 struct task_struct *next,
233 unsigned int prev_state),
234
235 TP_ARGS(preempt, prev, next, prev_state),
236
237 TP_FIELDS(
238 ctf_array_text(char, prev_comm, prev->comm, TASK_COMM_LEN)
239 ctf_integer(pid_t, prev_tid, prev->pid)
240 ctf_integer(int, prev_prio, prev->prio - MAX_RT_PRIO)
241 #ifdef CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM
242 ctf_enum(task_state, long, prev_state, __trace_sched_switch_state(preempt, prev_state, prev))
243 #else
244 ctf_integer(long, prev_state, __trace_sched_switch_state(preempt, prev_state, prev))
245 #endif
246 ctf_array_text(char, next_comm, next->comm, TASK_COMM_LEN)
247 ctf_integer(pid_t, next_tid, next->pid)
248 ctf_integer(int, next_prio, next->prio - MAX_RT_PRIO)
249 )
250 )
251
252 #else
253
254 LTTNG_TRACEPOINT_EVENT(sched_switch,
255
256 TP_PROTO(bool preempt,
257 struct task_struct *prev,
258 struct task_struct *next),
259
260 TP_ARGS(preempt, prev, next),
261
262 TP_FIELDS(
263 ctf_array_text(char, prev_comm, prev->comm, TASK_COMM_LEN)
264 ctf_integer(pid_t, prev_tid, prev->pid)
265 ctf_integer(int, prev_prio, prev->prio - MAX_RT_PRIO)
266 #ifdef CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM
267 ctf_enum(task_state, long, prev_state, __trace_sched_switch_state(preempt, prev))
268 #else
269 ctf_integer(long, prev_state, __trace_sched_switch_state(preempt, prev))
270 #endif
271 ctf_array_text(char, next_comm, next->comm, TASK_COMM_LEN)
272 ctf_integer(pid_t, next_tid, next->pid)
273 ctf_integer(int, next_prio, next->prio - MAX_RT_PRIO)
274 )
275 )
276 #endif
277
278 /*
279 * Tracepoint for a task being migrated:
280 */
281 LTTNG_TRACEPOINT_EVENT(sched_migrate_task,
282
283 TP_PROTO(struct task_struct *p, int dest_cpu),
284
285 TP_ARGS(p, dest_cpu),
286
287 TP_FIELDS(
288 ctf_array_text(char, comm, p->comm, TASK_COMM_LEN)
289 ctf_integer(pid_t, tid, p->pid)
290 ctf_integer(int, prio, p->prio - MAX_RT_PRIO)
291 ctf_integer(int, orig_cpu, task_cpu(p))
292 ctf_integer(int, dest_cpu, dest_cpu)
293 )
294 )
295
296 LTTNG_TRACEPOINT_EVENT_CLASS(sched_process_template,
297
298 TP_PROTO(struct task_struct *p),
299
300 TP_ARGS(p),
301
302 TP_FIELDS(
303 ctf_array_text(char, comm, p->comm, TASK_COMM_LEN)
304 ctf_integer(pid_t, tid, p->pid)
305 ctf_integer(int, prio, p->prio - MAX_RT_PRIO)
306 )
307 )
308
309 /*
310 * Tracepoint for freeing a task:
311 */
312 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_process_free,
313 TP_PROTO(struct task_struct *p),
314 TP_ARGS(p))
315
316
317 /*
318 * Tracepoint for a task exiting:
319 */
320 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_process_exit,
321 TP_PROTO(struct task_struct *p),
322 TP_ARGS(p))
323
324 /*
325 * Tracepoint for waiting on task to unschedule:
326 */
327 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_wait_task,
328 TP_PROTO(struct task_struct *p),
329 TP_ARGS(p))
330
331 /*
332 * Tracepoint for a waiting task:
333 */
334 LTTNG_TRACEPOINT_EVENT(sched_process_wait,
335
336 TP_PROTO(struct pid *pid),
337
338 TP_ARGS(pid),
339
340 TP_FIELDS(
341 ctf_array_text(char, comm, current->comm, TASK_COMM_LEN)
342 ctf_integer(pid_t, tid, pid_nr(pid))
343 ctf_integer(int, prio, current->prio - MAX_RT_PRIO)
344 )
345 )
346
347 /*
348 * Tracepoint for do_fork.
349 * Saving both TID and PID information, especially for the child, allows
350 * trace analyzers to distinguish between creation of a new process and
351 * creation of a new thread. Newly created processes will have child_tid
352 * == child_pid, while creation of a thread yields to child_tid !=
353 * child_pid.
354 */
355 LTTNG_TRACEPOINT_EVENT_CODE(sched_process_fork,
356
357 TP_PROTO(struct task_struct *parent, struct task_struct *child),
358
359 TP_ARGS(parent, child),
360
361 TP_locvar(
362 pid_t vtids[LTTNG_MAX_PID_NS_LEVEL];
363 unsigned int ns_level;
364 ),
365
366 TP_code_pre(
367 if (child) {
368 struct pid *child_pid;
369 unsigned int i;
370
371 child_pid = task_pid(child);
372 tp_locvar->ns_level =
373 min_t(unsigned int, child_pid->level + 1,
374 LTTNG_MAX_PID_NS_LEVEL);
375 for (i = 0; i < tp_locvar->ns_level; i++)
376 tp_locvar->vtids[i] = child_pid->numbers[i].nr;
377 }
378 ),
379
380 TP_FIELDS(
381 ctf_array_text(char, parent_comm, parent->comm, TASK_COMM_LEN)
382 ctf_integer(pid_t, parent_tid, parent->pid)
383 ctf_integer(pid_t, parent_pid, parent->tgid)
384 ctf_integer(unsigned int, parent_ns_inum,
385 ({
386 unsigned int parent_ns_inum = 0;
387
388 if (parent) {
389 struct pid_namespace *pid_ns;
390
391 pid_ns = task_active_pid_ns(parent);
392 if (pid_ns)
393 parent_ns_inum =
394 pid_ns->ns.inum;
395 }
396 parent_ns_inum;
397 }))
398 ctf_array_text(char, child_comm, child->comm, TASK_COMM_LEN)
399 ctf_integer(pid_t, child_tid, child->pid)
400 ctf_sequence(pid_t, vtids, tp_locvar->vtids, u8, tp_locvar->ns_level)
401 ctf_integer(pid_t, child_pid, child->tgid)
402 ctf_integer(unsigned int, child_ns_inum,
403 ({
404 unsigned int child_ns_inum = 0;
405
406 if (child) {
407 struct pid_namespace *pid_ns;
408
409 pid_ns = task_active_pid_ns(child);
410 if (pid_ns)
411 child_ns_inum =
412 pid_ns->ns.inum;
413 }
414 child_ns_inum;
415 }))
416 ),
417
418 TP_code_post()
419 )
420
421 /*
422 * Tracepoint for exec:
423 */
424 LTTNG_TRACEPOINT_EVENT(sched_process_exec,
425
426 TP_PROTO(struct task_struct *p, pid_t old_pid,
427 struct linux_binprm *bprm),
428
429 TP_ARGS(p, old_pid, bprm),
430
431 TP_FIELDS(
432 ctf_string(filename, bprm->filename)
433 ctf_integer(pid_t, tid, p->pid)
434 ctf_integer(pid_t, old_tid, old_pid)
435 )
436 )
437
438 /*
439 * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
440 * adding sched_stat support to SCHED_FIFO/RR would be welcome.
441 */
442 LTTNG_TRACEPOINT_EVENT_CLASS(sched_stat_template,
443
444 TP_PROTO(struct task_struct *tsk, u64 delay),
445
446 TP_ARGS(tsk, delay),
447
448 TP_FIELDS(
449 ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN)
450 ctf_integer(pid_t, tid, tsk->pid)
451 ctf_integer(u64, delay, delay)
452 )
453 )
454
455
456 /*
457 * Tracepoint for accounting wait time (time the task is runnable
458 * but not actually running due to scheduler contention).
459 */
460 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_wait,
461 TP_PROTO(struct task_struct *tsk, u64 delay),
462 TP_ARGS(tsk, delay))
463
464 /*
465 * Tracepoint for accounting sleep time (time the task is not runnable,
466 * including iowait, see below).
467 */
468 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_sleep,
469 TP_PROTO(struct task_struct *tsk, u64 delay),
470 TP_ARGS(tsk, delay))
471
472 /*
473 * Tracepoint for accounting iowait time (time the task is not runnable
474 * due to waiting on IO to complete).
475 */
476 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_iowait,
477 TP_PROTO(struct task_struct *tsk, u64 delay),
478 TP_ARGS(tsk, delay))
479
480 /*
481 * Tracepoint for accounting blocked time (time the task is in uninterruptible).
482 */
483 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_blocked,
484 TP_PROTO(struct task_struct *tsk, u64 delay),
485 TP_ARGS(tsk, delay))
486
487 #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(6,8,0))
488 /*
489 * Tracepoint for accounting runtime (time the task is executing
490 * on a CPU).
491 */
492 LTTNG_TRACEPOINT_EVENT(sched_stat_runtime,
493
494 TP_PROTO(struct task_struct *tsk, u64 runtime),
495
496 TP_ARGS(tsk, runtime),
497
498 TP_FIELDS(
499 ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN)
500 ctf_integer(pid_t, tid, tsk->pid)
501 ctf_integer(u64, runtime, runtime)
502 )
503 )
504 #else
505 /*
506 * Tracepoint for accounting runtime (time the task is executing
507 * on a CPU).
508 */
509 LTTNG_TRACEPOINT_EVENT(sched_stat_runtime,
510
511 TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
512
513 TP_ARGS(tsk, runtime, vruntime),
514
515 TP_FIELDS(
516 ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN)
517 ctf_integer(pid_t, tid, tsk->pid)
518 ctf_integer(u64, runtime, runtime)
519 ctf_integer(u64, vruntime, vruntime)
520 )
521 )
522 #endif
523
524 #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,12,0) || \
525 LTTNG_RT_KERNEL_RANGE(4,9,27,18, 4,10,0,0) || \
526 LTTNG_RT_KERNEL_RANGE(4,11,5,1, 4,12,0,0))
527 /*
528 * Tracepoint for showing priority inheritance modifying a tasks
529 * priority.
530 */
531 LTTNG_TRACEPOINT_EVENT(sched_pi_setprio,
532
533 TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task),
534
535 TP_ARGS(tsk, pi_task),
536
537 TP_FIELDS(
538 ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN)
539 ctf_integer(pid_t, tid, tsk->pid)
540 ctf_integer(int, oldprio, tsk->prio - MAX_RT_PRIO)
541 ctf_integer(int, newprio, pi_task ? pi_task->prio - MAX_RT_PRIO : tsk->prio - MAX_RT_PRIO)
542 )
543 )
544 #else
545 /*
546 * Tracepoint for showing priority inheritance modifying a tasks
547 * priority.
548 */
549 LTTNG_TRACEPOINT_EVENT(sched_pi_setprio,
550
551 TP_PROTO(struct task_struct *tsk, int newprio),
552
553 TP_ARGS(tsk, newprio),
554
555 TP_FIELDS(
556 ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN)
557 ctf_integer(pid_t, tid, tsk->pid)
558 ctf_integer(int, oldprio, tsk->prio - MAX_RT_PRIO)
559 ctf_integer(int, newprio, newprio - MAX_RT_PRIO)
560 )
561 )
562 #endif
563
564 #endif /* LTTNG_TRACE_SCHED_H */
565
566 /* This part must be outside protection */
567 #include <lttng/define_trace.h>
This page took 0.040459 seconds and 4 git commands to generate.