Fix: tests: health thread stall: only stop consumerd when required
[lttng-tools.git] / tests / regression / tools / health / test_health.sh
1 # Copyright (C) - 2012 Christian Babeux <christian.babeux@efficios.com>
2 # Copyright (C) - 2014 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3 #
4 # This program is free software; you can redistribute it and/or modify it
5 # under the terms of the GNU General Public License, version 2 only, as
6 # published by the Free Software Foundation.
7 #
8 # This program is distributed in the hope that it will be useful, but WITHOUT
9 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 # more details.
12 #
13 # You should have received a copy of the GNU General Public License along with
14 # this program; if not, write to the Free Software Foundation, Inc., 51
15 # Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16
17 TESTDIR=${CURDIR}/../../..
18 UST_EVENT_NAME="tp:tptest"
19 KERNEL_EVENT_NAME="sched_switch"
20 CHANNEL_NAME="testchan"
21 HEALTH_CHECK_BIN="health_check"
22 NUM_TESTS=99
23 SLEEP_TIME=30
24
25 source $TESTDIR/utils/utils.sh
26
27 function report_errors
28 {
29 test_thread_error_string="$1"
30 test_relayd="$2"
31 err_no_relayd_match="Error querying relayd health"
32
33 # Check for health errors
34 # Include inability to contact relayd health as an expected
35 # error, since this can happen whenever the relayd shutdown due
36 # to an error in any thread.
37 out=$(grep "${test_thread_error_string}" ${STDOUT_PATH} | wc -l)
38 if [ $test_relayd -ne 0 ]; then
39 outerr=$(grep "${err_no_relayd_match}" ${STDERR_PATH} | wc -l)
40 else
41 outerr=0
42 fi
43 if [ $out -eq 0 ] && [ $outerr -eq 0 ]; then
44 fail "Validation failure"
45 diag "Health returned:"
46 diag "stdout:"
47 file=${STDOUT_PATH}
48 while read line ; do
49 diag "$line"
50 done < ${file}
51
52 diag "stderr:"
53 file=${STDERR_PATH}
54 while read line ; do
55 diag "$line"
56 done < ${file}
57 else
58 pass "Validation OK"
59 fi
60 }
61
62 function test_health
63 {
64 test_suffix="$1"
65 test_thread_name="$2"
66 test_thread_error_string="$3"
67 test_needs_root="$4"
68 test_consumerd="$5"
69 test_relayd="$6"
70
71 diag "Test health problem detection with ${test_thread_name}"
72
73 # Set the socket timeout to 5 so the health check detection
74 # happens within 25 s
75 export LTTNG_NETWORK_SOCKET_TIMEOUT=5
76 export LTTNG_RELAYD_HEALTH="${HEALTH_PATH}/test-health"
77
78 # Activate testpoints
79 export LTTNG_TESTPOINT_ENABLE=1
80
81 # Activate specific thread test
82 export ${test_thread_name}_${test_suffix}=1
83
84 # Spawn sessiond with preload healthexit lib
85 export LD_PRELOAD="$CURDIR/$SESSIOND_PRELOAD"
86
87 diag "Start session daemon"
88 start_lttng_sessiond
89
90 if [ ${test_consumerd} -eq 1 ]; then
91 create_lttng_session_no_output $SESSION_NAME
92
93 diag "With UST consumer daemons"
94 enable_ust_lttng_event_ok $SESSION_NAME $UST_EVENT_NAME $CHANNEL_NAME
95
96 skip $isroot "Root access is needed. Skipping kernel consumer health check test." "1" ||
97 {
98 diag "With kernel consumer daemon"
99 lttng_enable_kernel_event $SESSION_NAME $KERNEL_EVENT_NAME $CHANNEL_NAME
100 }
101 start_lttng_tracing_ok $SESSION_NAME
102 fi
103
104 if [ ${test_relayd} -eq 1 ]; then
105 diag "With relay daemon"
106 RELAYD_ARGS="--relayd-path=${LTTNG_RELAYD_HEALTH}"
107
108 start_lttng_relayd "-o $TRACE_PATH"
109 else
110 RELAYD_ARGS=
111 fi
112
113 # Check health status, not caring about result
114 $CURDIR/$HEALTH_CHECK_BIN ${RELAYD_ARGS} \
115 > /dev/null
116
117 # Wait
118 diag "Check after running for ${SLEEP_TIME} seconds"
119 sleep ${SLEEP_TIME}
120
121 # Check health status
122 $CURDIR/$HEALTH_CHECK_BIN ${RELAYD_ARGS} \
123 > ${STDOUT_PATH} 2> ${STDERR_PATH}
124
125
126 if [ ${test_needs_root} -eq 1 ]; then
127 skip ${isroot} "Root access needed for test \"${test_thread_name}\"." "1" ||
128 {
129 report_errors "${test_thread_error_string}" "${test_relayd}"
130 }
131 else
132 report_errors "${test_thread_error_string}" "${test_relayd}"
133 fi
134
135 if [ ${test_relayd} -eq 1 ]; then
136 # We may fail to stop relayd here, and this is OK, since
137 # it may have been killed volountarily by testpoint.
138 stop_lttng_relayd_notap $KILL_SIGNAL
139 fi
140
141 if [ ${test_consumerd} -eq 1 ]; then
142 stop_lttng_consumerd $KILL_SIGNAL
143 fi
144 stop_lttng_sessiond $KILL_SIGNAL
145
146 unset LTTNG_TESTPOINT_ENABLE
147 unset ${test_thread_name}_${test_suffix}
148 unset LD_PRELOAD
149 unset LTTNG_NETWORK_SOCKET_TIMEOUT
150 unset LTTNG_RELAYD_HEALTH
151 }
152
153 plan_tests $NUM_TESTS
154
155 print_test_banner "$TEST_DESC"
156
157 if [ -f "$CURDIR/$SESSIOND_PRELOAD" ]; then
158 foundobj=1
159 else
160 foundobj=0
161 fi
162
163 skip $foundobj "No shared object generated. Skipping all tests." $NUM_TESTS && exit 0
164
165 THREAD=("LTTNG_SESSIOND_THREAD_MANAGE_CLIENTS"
166 "LTTNG_SESSIOND_THREAD_MANAGE_APPS"
167 "LTTNG_SESSIOND_THREAD_REG_APPS"
168 "LTTNG_SESSIOND_THREAD_HT_CLEANUP"
169 "LTTNG_SESSIOND_THREAD_APP_MANAGE_NOTIFY"
170 "LTTNG_SESSIOND_THREAD_APP_REG_DISPATCH"
171 "LTTNG_SESSIOND_THREAD_MANAGE_KERNEL"
172
173 "LTTNG_CONSUMERD_THREAD_CHANNEL"
174 "LTTNG_CONSUMERD_THREAD_METADATA"
175 "LTTNG_CONSUMERD_THREAD_METADATA_TIMER"
176
177 "LTTNG_RELAYD_THREAD_DISPATCHER"
178 "LTTNG_RELAYD_THREAD_WORKER"
179 "LTTNG_RELAYD_THREAD_LISTENER"
180 "LTTNG_RELAYD_THREAD_LIVE_DISPATCHER"
181 "LTTNG_RELAYD_THREAD_LIVE_WORKER"
182 "LTTNG_RELAYD_THREAD_LIVE_LISTENER"
183 )
184
185 ERROR_STRING=(
186 "Thread \"Session daemon command\" is not responding in component \"sessiond\"."
187 "Thread \"Session daemon application manager\" is not responding in component \"sessiond\"."
188 "Thread \"Session daemon application registration\" is not responding in component \"sessiond\"."
189 "Thread \"Session daemon hash table cleanup\" is not responding in component \"sessiond\"."
190 "Thread \"Session daemon application notification manager\" is not responding in component \"sessiond\"."
191 "Thread \"Session daemon application registration dispatcher\" is not responding in component \"sessiond\"."
192 "Thread \"Session daemon kernel\" is not responding in component \"sessiond\"."
193
194 "Thread \"Consumer daemon channel\" is not responding"
195 "Thread \"Consumer daemon metadata\" is not responding"
196 "Thread \"Consumer daemon metadata timer\" is not responding"
197
198 "Thread \"Relay daemon dispatcher\" is not responding in component \"relayd\"."
199 "Thread \"Relay daemon worker\" is not responding in component \"relayd\"."
200 "Thread \"Relay daemon listener\" is not responding in component \"relayd\"."
201 "Thread \"Relay daemon live dispatcher\" is not responding in component \"relayd\"."
202 "Thread \"Relay daemon live worker\" is not responding in component \"relayd\"."
203 "Thread \"Relay daemon live listener\" is not responding in component \"relayd\"."
204 )
205
206 # TODO
207 # "LTTNG_SESSIOND_THREAD_MANAGE_CONSUMER"
208 # "Thread \"Session daemon manage consumer\" is not responding in component \"sessiond\"."
209
210 # TODO: test kernel consumerd specifically in addition to UST consumerd
211
212 # TODO: need refactoring of consumerd teardown
213 # "LTTNG_CONSUMERD_THREAD_SESSIOND"
214 # "Thread \"Consumer daemon session daemon command manager\" is not responding"
215
216 # TODO: this thread is responsible for close a file descriptor that
217 # triggers teardown of metadata thread. We should revisit teardown of
218 # consumerd.
219 # "LTTNG_CONSUMERD_THREAD_DATA"
220 # "Thread \"Consumer daemon data\" is not responding"
221
222 NEEDS_ROOT=(
223 0
224 0
225 0
226 0
227 0
228 0
229 1
230
231 0
232 0
233 0
234
235 0
236 0
237 0
238 0
239 0
240 0
241 )
242
243 TEST_CONSUMERD=(
244 0
245 0
246 0
247 0
248 0
249 0
250 0
251
252 1
253 1
254 1
255
256 1
257 1
258 1
259 1
260 1
261 1
262 )
263
264 TEST_RELAYD=(
265 0
266 0
267 0
268 0
269 0
270 0
271 0
272
273 0
274 0
275 0
276
277 1
278 1
279 1
280 1
281 1
282 1
283 )
284
285 STDOUT_PATH=$(mktemp)
286 STDERR_PATH=$(mktemp)
287 TRACE_PATH=$(mktemp -d)
288 HEALTH_PATH=$(mktemp -d)
289
290 if [ "$(id -u)" == "0" ]; then
291 isroot=1
292 else
293 isroot=0
294 fi
295
296 THREAD_COUNT=${#THREAD[@]}
297 i=0
298 while [ "$i" -lt "$THREAD_COUNT" ]; do
299 test_health "${TEST_SUFFIX}" \
300 "${THREAD[$i]}" \
301 "${ERROR_STRING[$i]}" \
302 "${NEEDS_ROOT[$i]}" \
303 "${TEST_CONSUMERD[$i]}" \
304 "${TEST_RELAYD[$i]}"
305 let "i++"
306 done
307
308 rm -rf ${HEALTH_PATH}
309 rm -rf ${TRACE_PATH}
310 rm -f ${STDOUT_PATH}
311 rm -f ${STDERR_PATH}
This page took 0.035056 seconds and 4 git commands to generate.