Fix: update sched wakeup instrumentation for 4.3 kernel
[lttng-modules.git] / instrumentation / events / lttng-module / sched.h
index 005f3d141e1f7bca6d6ccfe83dd29d94986c6057..f6b65ae2e98ad36651588accabe3504b1c8241af 100644 (file)
@@ -1,15 +1,76 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM sched
 
-#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_SCHED_H
+#if !defined(LTTNG_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
+#define LTTNG_TRACE_SCHED_H
 
+#include "../../../probes/lttng-tracepoint-event.h"
 #include <linux/sched.h>
-#include <linux/tracepoint.h>
+#include <linux/binfmts.h>
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,9,0))
+#include <linux/sched/rt.h>
+#endif
 
 #ifndef _TRACE_SCHED_DEF_
 #define _TRACE_SCHED_DEF_
 
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0))
+
+static inline long __trace_sched_switch_state(struct task_struct *p)
+{
+       long state = p->state;
+
+#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_SCHED_DEBUG
+       BUG_ON(p != current);
+#endif /* CONFIG_SCHED_DEBUG */
+       /*
+        * For all intents and purposes a preempted task is a running task.
+        */
+       if (preempt_count() & PREEMPT_ACTIVE)
+               state = TASK_RUNNING | TASK_STATE_MAX;
+#endif /* CONFIG_PREEMPT */
+
+       return state;
+}
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3,13,0))
+
+static inline long __trace_sched_switch_state(struct task_struct *p)
+{
+       long state = p->state;
+
+#ifdef CONFIG_PREEMPT
+       /*
+        * For all intents and purposes a preempted task is a running task.
+        */
+       if (task_preempt_count(p) & PREEMPT_ACTIVE)
+               state = TASK_RUNNING | TASK_STATE_MAX;
+#endif
+
+       return state;
+}
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0))
+
+static inline long __trace_sched_switch_state(struct task_struct *p)
+{
+       long state = p->state;
+
+#ifdef CONFIG_PREEMPT
+       /*
+        * For all intents and purposes a preempted task is a running task.
+        */
+       if (task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)
+               state = TASK_RUNNING | TASK_STATE_MAX;
+#endif
+
+       return state;
+}
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35))
+
 static inline long __trace_sched_switch_state(struct task_struct *p)
 {
        long state = p->state;
@@ -25,12 +86,14 @@ static inline long __trace_sched_switch_state(struct task_struct *p)
        return state;
 }
 
+#endif
+
 #endif /* _TRACE_SCHED_DEF_ */
 
 /*
  * Tracepoint for calling kthread_stop, performed to end a kthread:
  */
-TRACE_EVENT(sched_kthread_stop,
+LTTNG_TRACEPOINT_EVENT(sched_kthread_stop,
 
        TP_PROTO(struct task_struct *t),
 
@@ -52,7 +115,7 @@ TRACE_EVENT(sched_kthread_stop,
 /*
  * Tracepoint for the return value of the kthread stopping:
  */
-TRACE_EVENT(sched_kthread_stop_ret,
+LTTNG_TRACEPOINT_EVENT(sched_kthread_stop_ret,
 
        TP_PROTO(int ret),
 
@@ -72,7 +135,22 @@ TRACE_EVENT(sched_kthread_stop_ret,
 /*
  * Tracepoint for waking up a task:
  */
-DECLARE_EVENT_CLASS(sched_wakeup_template,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0))
+LTTNG_TRACEPOINT_EVENT_CLASS(sched_wakeup_template,
+
+       TP_PROTO(struct task_struct *p),
+
+       TP_ARGS(p),
+
+       TP_FIELDS(
+               ctf_array_text(char, comm, p->comm, TASK_COMM_LEN)
+               ctf_integer(pid_t, tid, p->pid)
+               ctf_integer(int, prio, p->prio)
+               ctf_integer(int, target_cpu, task_cpu(p))
+       )
+)
+#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0)) */
+LTTNG_TRACEPOINT_EVENT_CLASS(sched_wakeup_template,
 
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35))
        TP_PROTO(struct task_struct *p, int success),
@@ -89,7 +167,9 @@ DECLARE_EVENT_CLASS(sched_wakeup_template,
                __field(        pid_t,  tid                     )
                __field(        int,    prio                    )
                __field(        int,    success                 )
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32))
                __field(        int,    target_cpu              )
+#endif
        ),
 
        TP_fast_assign(
@@ -97,37 +177,76 @@ DECLARE_EVENT_CLASS(sched_wakeup_template,
                tp_assign(tid, p->pid)
                tp_assign(prio, p->prio)
                tp_assign(success, success)
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32))
                tp_assign(target_cpu, task_cpu(p))
+#endif
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,6,0))
+       )
+       TP_perf_assign(
+               __perf_task(p)
+#endif
        ),
 
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32))
        TP_printk("comm=%s tid=%d prio=%d success=%d target_cpu=%03d",
                  __entry->comm, __entry->tid, __entry->prio,
                  __entry->success, __entry->target_cpu)
+#else
+       TP_printk("comm=%s tid=%d prio=%d success=%d",
+                 __entry->comm, __entry->tid, __entry->prio,
+                 __entry->success)
+#endif
 )
+#endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0)) */
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35))
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0))
+
+/*
+ * Tracepoint called when waking a task; this tracepoint is guaranteed to be
+ * called from the waking context.
+ */
+LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_waking,
+            TP_PROTO(struct task_struct *p),
+            TP_ARGS(p))
+
+/*
+ * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG.
+ * It it not always called from the waking context.
+ */
+LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup,
+            TP_PROTO(struct task_struct *p),
+            TP_ARGS(p))
+
+/*
+ * Tracepoint for waking up a new task:
+ */
+LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup_new,
+            TP_PROTO(struct task_struct *p),
+            TP_ARGS(p))
 
-DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35))
+
+LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup,
             TP_PROTO(struct task_struct *p, int success),
             TP_ARGS(p, success))
 
 /*
  * Tracepoint for waking up a new task:
  */
-DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
+LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup_new,
             TP_PROTO(struct task_struct *p, int success),
             TP_ARGS(p, success))
 
 #else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) */
 
-DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
+LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup,
             TP_PROTO(struct rq *rq, struct task_struct *p, int success),
             TP_ARGS(rq, p, success))
 
 /*
  * Tracepoint for waking up a new task:
  */
-DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
+LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup_new,
             TP_PROTO(struct rq *rq, struct task_struct *p, int success),
             TP_ARGS(rq, p, success))
 
@@ -136,7 +255,7 @@ DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
 /*
  * Tracepoint for task switches, performed by the scheduler:
  */
-TRACE_EVENT(sched_switch,
+LTTNG_TRACEPOINT_EVENT(sched_switch,
 
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35))
        TP_PROTO(struct task_struct *prev,
@@ -164,12 +283,27 @@ TRACE_EVENT(sched_switch,
                tp_memcpy(next_comm, next->comm, TASK_COMM_LEN)
                tp_assign(prev_tid, prev->pid)
                tp_assign(prev_prio, prev->prio - MAX_RT_PRIO)
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35))
                tp_assign(prev_state, __trace_sched_switch_state(prev))
+#else
+               tp_assign(prev_state, prev->state)
+#endif
                tp_memcpy(prev_comm, prev->comm, TASK_COMM_LEN)
                tp_assign(next_tid, next->pid)
                tp_assign(next_prio, next->prio - MAX_RT_PRIO)
        ),
 
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0))
+       TP_printk("prev_comm=%s prev_tid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_tid=%d next_prio=%d",
+               __entry->prev_comm, __entry->prev_tid, __entry->prev_prio,
+               __entry->prev_state & (TASK_STATE_MAX-1) ?
+                 __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
+                               { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
+                               { 16, "Z" }, { 32, "X" }, { 64, "x" },
+                               { 128, "W" }) : "R",
+               __entry->prev_state & TASK_STATE_MAX ? "+" : "",
+               __entry->next_comm, __entry->next_tid, __entry->next_prio)
+#else
        TP_printk("prev_comm=%s prev_tid=%d prev_prio=%d prev_state=%s ==> next_comm=%s next_tid=%d next_prio=%d",
                __entry->prev_comm, __entry->prev_tid, __entry->prev_prio,
                __entry->prev_state ?
@@ -178,12 +312,13 @@ TRACE_EVENT(sched_switch,
                                { 16, "Z" }, { 32, "X" }, { 64, "x" },
                                { 128, "W" }) : "R",
                __entry->next_comm, __entry->next_tid, __entry->next_prio)
+#endif
 )
 
 /*
  * Tracepoint for a task being migrated:
  */
-TRACE_EVENT(sched_migrate_task,
+LTTNG_TRACEPOINT_EVENT(sched_migrate_task,
 
        TP_PROTO(struct task_struct *p, int dest_cpu),
 
@@ -210,7 +345,7 @@ TRACE_EVENT(sched_migrate_task,
                  __entry->orig_cpu, __entry->dest_cpu)
 )
 
-DECLARE_EVENT_CLASS(sched_process_template,
+LTTNG_TRACEPOINT_EVENT_CLASS(sched_process_template,
 
        TP_PROTO(struct task_struct *p),
 
@@ -235,7 +370,7 @@ DECLARE_EVENT_CLASS(sched_process_template,
 /*
  * Tracepoint for freeing a task:
  */
-DEFINE_EVENT(sched_process_template, sched_process_free,
+LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_process_free,
             TP_PROTO(struct task_struct *p),
             TP_ARGS(p))
             
@@ -243,7 +378,7 @@ DEFINE_EVENT(sched_process_template, sched_process_free,
 /*
  * Tracepoint for a task exiting:
  */
-DEFINE_EVENT(sched_process_template, sched_process_exit,
+LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_process_exit,
             TP_PROTO(struct task_struct *p),
             TP_ARGS(p))
 
@@ -251,11 +386,11 @@ DEFINE_EVENT(sched_process_template, sched_process_exit,
  * Tracepoint for waiting on task to unschedule:
  */
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35))
-DEFINE_EVENT(sched_process_template, sched_wait_task,
+LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_wait_task,
        TP_PROTO(struct task_struct *p),
        TP_ARGS(p))
 #else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) */
-DEFINE_EVENT(sched_process_template, sched_wait_task,
+LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_wait_task,
        TP_PROTO(struct rq *rq, struct task_struct *p),
        TP_ARGS(rq, p))
 #endif /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) */
@@ -263,7 +398,7 @@ DEFINE_EVENT(sched_process_template, sched_wait_task,
 /*
  * Tracepoint for a waiting task:
  */
-TRACE_EVENT(sched_process_wait,
+LTTNG_TRACEPOINT_EVENT(sched_process_wait,
 
        TP_PROTO(struct pid *pid),
 
@@ -286,9 +421,14 @@ TRACE_EVENT(sched_process_wait,
 )
 
 /*
- * Tracepoint for do_fork:
+ * Tracepoint for do_fork.
+ * Saving both TID and PID information, especially for the child, allows
+ * trace analyzers to distinguish between creation of a new process and
+ * creation of a new thread. Newly created processes will have child_tid
+ * == child_pid, while creation of a thread yields to child_tid !=
+ * child_pid.
  */
-TRACE_EVENT(sched_process_fork,
+LTTNG_TRACEPOINT_EVENT(sched_process_fork,
 
        TP_PROTO(struct task_struct *parent, struct task_struct *child),
 
@@ -297,15 +437,19 @@ TRACE_EVENT(sched_process_fork,
        TP_STRUCT__entry(
                __array_text(   char,   parent_comm,    TASK_COMM_LEN   )
                __field(        pid_t,  parent_tid                      )
+               __field(        pid_t,  parent_pid                      )
                __array_text(   char,   child_comm,     TASK_COMM_LEN   )
                __field(        pid_t,  child_tid                       )
+               __field(        pid_t,  child_pid                       )
        ),
 
        TP_fast_assign(
                tp_memcpy(parent_comm, parent->comm, TASK_COMM_LEN)
                tp_assign(parent_tid, parent->pid)
+               tp_assign(parent_pid, parent->tgid)
                tp_memcpy(child_comm, child->comm, TASK_COMM_LEN)
                tp_assign(child_tid, child->pid)
+               tp_assign(child_pid, child->tgid)
        ),
 
        TP_printk("comm=%s tid=%d child_comm=%s child_tid=%d",
@@ -313,11 +457,38 @@ TRACE_EVENT(sched_process_fork,
                __entry->child_comm, __entry->child_tid)
 )
 
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33))
+/*
+ * Tracepoint for sending a signal:
+ */
+LTTNG_TRACEPOINT_EVENT(sched_signal_send,
+
+       TP_PROTO(int sig, struct task_struct *p),
+
+       TP_ARGS(sig, p),
+
+       TP_STRUCT__entry(
+               __field(        int,    sig                     )
+               __array(        char,   comm,   TASK_COMM_LEN   )
+               __field(        pid_t,  pid                     )
+       ),
+
+       TP_fast_assign(
+               tp_memcpy(comm, p->comm, TASK_COMM_LEN)
+               tp_assign(pid, p->pid)
+               tp_assign(sig, sig)
+       ),
+
+       TP_printk("sig=%d comm=%s pid=%d",
+               __entry->sig, __entry->comm, __entry->pid)
+)
+#endif
+
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,4,0))
 /*
  * Tracepoint for exec:
  */
-TRACE_EVENT(sched_process_exec,
+LTTNG_TRACEPOINT_EVENT(sched_process_exec,
 
        TP_PROTO(struct task_struct *p, pid_t old_pid,
                 struct linux_binprm *bprm),
@@ -341,11 +512,12 @@ TRACE_EVENT(sched_process_exec,
 )
 #endif
 
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32))
 /*
  * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
  *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
  */
-DECLARE_EVENT_CLASS(sched_stat_template,
+LTTNG_TRACEPOINT_EVENT_CLASS(sched_stat_template,
 
        TP_PROTO(struct task_struct *tsk, u64 delay),
 
@@ -376,7 +548,7 @@ DECLARE_EVENT_CLASS(sched_stat_template,
  * Tracepoint for accounting wait time (time the task is runnable
  * but not actually running due to scheduler contention).
  */
-DEFINE_EVENT(sched_stat_template, sched_stat_wait,
+LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_wait,
             TP_PROTO(struct task_struct *tsk, u64 delay),
             TP_ARGS(tsk, delay))
 
@@ -384,7 +556,7 @@ DEFINE_EVENT(sched_stat_template, sched_stat_wait,
  * Tracepoint for accounting sleep time (time the task is not runnable,
  * including iowait, see below).
  */
-DEFINE_EVENT(sched_stat_template, sched_stat_sleep,
+LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_sleep,
             TP_PROTO(struct task_struct *tsk, u64 delay),
             TP_ARGS(tsk, delay))
 
@@ -392,15 +564,24 @@ DEFINE_EVENT(sched_stat_template, sched_stat_sleep,
  * Tracepoint for accounting iowait time (time the task is not runnable
  * due to waiting on IO to complete).
  */
-DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
+LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_iowait,
+            TP_PROTO(struct task_struct *tsk, u64 delay),
+            TP_ARGS(tsk, delay))
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,3,0))
+/*
+ * Tracepoint for accounting blocked time (time the task is in uninterruptible).
+ */
+LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_blocked,
             TP_PROTO(struct task_struct *tsk, u64 delay),
             TP_ARGS(tsk, delay))
+#endif
 
 /*
  * Tracepoint for accounting runtime (time the task is executing
  * on a CPU).
  */
-TRACE_EVENT(sched_stat_runtime,
+LTTNG_TRACEPOINT_EVENT(sched_stat_runtime,
 
        TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
 
@@ -421,6 +602,9 @@ TRACE_EVENT(sched_stat_runtime,
        )
        TP_perf_assign(
                __perf_count(runtime)
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,6,0))
+               __perf_task(tsk)
+#endif
        ),
 
        TP_printk("comm=%s tid=%d runtime=%Lu [ns] vruntime=%Lu [ns]",
@@ -428,12 +612,14 @@ TRACE_EVENT(sched_stat_runtime,
                        (unsigned long long)__entry->runtime,
                        (unsigned long long)__entry->vruntime)
 )
+#endif
 
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37))
 /*
  * Tracepoint for showing priority inheritance modifying a tasks
  * priority.
  */
-TRACE_EVENT(sched_pi_setprio,
+LTTNG_TRACEPOINT_EVENT(sched_pi_setprio,
 
        TP_PROTO(struct task_struct *tsk, int newprio),
 
@@ -457,8 +643,9 @@ TRACE_EVENT(sched_pi_setprio,
                        __entry->comm, __entry->tid,
                        __entry->oldprio, __entry->newprio)
 )
+#endif
 
-#endif /* _TRACE_SCHED_H */
+#endif /* LTTNG_TRACE_SCHED_H */
 
 /* This part must be outside protection */
 #include "../../../probes/define_trace.h"
This page took 0.028838 seconds and 4 git commands to generate.