Fix: perform relayd socket pair cleanup on control socket error
[lttng-tools.git] / src / common / consumer / consumer-stream.c
1 /*
2 * Copyright (C) 2011 - Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * Copyright (C) 2013 - David Goulet <dgoulet@efficios.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License, version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define _LGPL_SOURCE
21 #include <assert.h>
22 #include <inttypes.h>
23 #include <sys/mman.h>
24 #include <unistd.h>
25
26 #include <common/common.h>
27 #include <common/index/index.h>
28 #include <common/kernel-consumer/kernel-consumer.h>
29 #include <common/relayd/relayd.h>
30 #include <common/ust-consumer/ust-consumer.h>
31 #include <common/utils.h>
32
33 #include "consumer-stream.h"
34
35 /*
36 * RCU call to free stream. MUST only be used with call_rcu().
37 */
38 static void free_stream_rcu(struct rcu_head *head)
39 {
40 struct lttng_ht_node_u64 *node =
41 caa_container_of(head, struct lttng_ht_node_u64, head);
42 struct lttng_consumer_stream *stream =
43 caa_container_of(node, struct lttng_consumer_stream, node);
44
45 pthread_mutex_destroy(&stream->lock);
46 free(stream);
47 }
48
49 /*
50 * Close stream on the relayd side. This call can destroy a relayd if the
51 * conditions are met.
52 *
53 * A RCU read side lock MUST be acquired if the relayd object was looked up in
54 * a hash table before calling this.
55 */
56 void consumer_stream_relayd_close(struct lttng_consumer_stream *stream,
57 struct consumer_relayd_sock_pair *relayd)
58 {
59 int ret;
60
61 assert(stream);
62 assert(relayd);
63
64 if (stream->sent_to_relayd) {
65 uatomic_dec(&relayd->refcount);
66 assert(uatomic_read(&relayd->refcount) >= 0);
67 }
68
69 /* Closing streams requires to lock the control socket. */
70 pthread_mutex_lock(&relayd->ctrl_sock_mutex);
71 ret = relayd_send_close_stream(&relayd->control_sock,
72 stream->relayd_stream_id,
73 stream->next_net_seq_num - 1);
74 pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
75 if (ret < 0) {
76 ERR("Relayd send close stream failed. Cleaning up relayd %" PRIu64 ".", relayd->net_seq_idx);
77 lttng_consumer_cleanup_relayd(relayd);
78 }
79
80 /* Both conditions are met, we destroy the relayd. */
81 if (uatomic_read(&relayd->refcount) == 0 &&
82 uatomic_read(&relayd->destroy_flag)) {
83 consumer_destroy_relayd(relayd);
84 }
85 stream->net_seq_idx = (uint64_t) -1ULL;
86 stream->sent_to_relayd = 0;
87 }
88
89 /*
90 * Close stream's file descriptors and, if needed, close stream also on the
91 * relayd side.
92 *
93 * The consumer data lock MUST be acquired.
94 * The stream lock MUST be acquired.
95 */
96 void consumer_stream_close(struct lttng_consumer_stream *stream)
97 {
98 int ret;
99 struct consumer_relayd_sock_pair *relayd;
100
101 assert(stream);
102
103 switch (consumer_data.type) {
104 case LTTNG_CONSUMER_KERNEL:
105 if (stream->mmap_base != NULL) {
106 ret = munmap(stream->mmap_base, stream->mmap_len);
107 if (ret != 0) {
108 PERROR("munmap");
109 }
110 }
111
112 if (stream->wait_fd >= 0) {
113 ret = close(stream->wait_fd);
114 if (ret) {
115 PERROR("close");
116 }
117 stream->wait_fd = -1;
118 }
119 if (stream->chan->output == CONSUMER_CHANNEL_SPLICE) {
120 utils_close_pipe(stream->splice_pipe);
121 }
122 break;
123 case LTTNG_CONSUMER32_UST:
124 case LTTNG_CONSUMER64_UST:
125 {
126 /*
127 * Special case for the metadata since the wait fd is an internal pipe
128 * polled in the metadata thread.
129 */
130 if (stream->metadata_flag && stream->chan->monitor) {
131 int rpipe = stream->ust_metadata_poll_pipe[0];
132
133 /*
134 * This will stop the channel timer if one and close the write side
135 * of the metadata poll pipe.
136 */
137 lttng_ustconsumer_close_metadata(stream->chan);
138 if (rpipe >= 0) {
139 ret = close(rpipe);
140 if (ret < 0) {
141 PERROR("closing metadata pipe read side");
142 }
143 stream->ust_metadata_poll_pipe[0] = -1;
144 }
145 }
146 break;
147 }
148 default:
149 ERR("Unknown consumer_data type");
150 assert(0);
151 }
152
153 /* Close output fd. Could be a socket or local file at this point. */
154 if (stream->out_fd >= 0) {
155 ret = close(stream->out_fd);
156 if (ret) {
157 PERROR("close");
158 }
159 stream->out_fd = -1;
160 }
161
162 if (stream->index_file) {
163 lttng_index_file_put(stream->index_file);
164 stream->index_file = NULL;
165 }
166
167 /* Check and cleanup relayd if needed. */
168 rcu_read_lock();
169 relayd = consumer_find_relayd(stream->net_seq_idx);
170 if (relayd != NULL) {
171 consumer_stream_relayd_close(stream, relayd);
172 }
173 rcu_read_unlock();
174 }
175
176 /*
177 * Delete the stream from all possible hash tables.
178 *
179 * The consumer data lock MUST be acquired.
180 * The stream lock MUST be acquired.
181 */
182 void consumer_stream_delete(struct lttng_consumer_stream *stream,
183 struct lttng_ht *ht)
184 {
185 int ret;
186 struct lttng_ht_iter iter;
187
188 assert(stream);
189 /* Should NEVER be called not in monitor mode. */
190 assert(stream->chan->monitor);
191
192 rcu_read_lock();
193
194 if (ht) {
195 iter.iter.node = &stream->node.node;
196 ret = lttng_ht_del(ht, &iter);
197 assert(!ret);
198 }
199
200 /* Delete from stream per channel ID hash table. */
201 iter.iter.node = &stream->node_channel_id.node;
202 /*
203 * The returned value is of no importance. Even if the node is NOT in the
204 * hash table, we continue since we may have been called by a code path
205 * that did not add the stream to a (all) hash table. Same goes for the
206 * next call ht del call.
207 */
208 (void) lttng_ht_del(consumer_data.stream_per_chan_id_ht, &iter);
209
210 /* Delete from the global stream list. */
211 iter.iter.node = &stream->node_session_id.node;
212 /* See the previous ht del on why we ignore the returned value. */
213 (void) lttng_ht_del(consumer_data.stream_list_ht, &iter);
214
215 rcu_read_unlock();
216
217 if (!stream->metadata_flag) {
218 /* Decrement the stream count of the global consumer data. */
219 assert(consumer_data.stream_count > 0);
220 consumer_data.stream_count--;
221 }
222 }
223
224 /*
225 * Free the given stream within a RCU call.
226 */
227 void consumer_stream_free(struct lttng_consumer_stream *stream)
228 {
229 assert(stream);
230
231 call_rcu(&stream->node.head, free_stream_rcu);
232 }
233
234 /*
235 * Destroy the stream's buffers of the tracer.
236 */
237 void consumer_stream_destroy_buffers(struct lttng_consumer_stream *stream)
238 {
239 assert(stream);
240
241 switch (consumer_data.type) {
242 case LTTNG_CONSUMER_KERNEL:
243 break;
244 case LTTNG_CONSUMER32_UST:
245 case LTTNG_CONSUMER64_UST:
246 lttng_ustconsumer_del_stream(stream);
247 break;
248 default:
249 ERR("Unknown consumer_data type");
250 assert(0);
251 }
252 }
253
254 /*
255 * Destroy and close a already created stream.
256 */
257 static void destroy_close_stream(struct lttng_consumer_stream *stream)
258 {
259 assert(stream);
260
261 DBG("Consumer stream destroy monitored key: %" PRIu64, stream->key);
262
263 /* Destroy tracer buffers of the stream. */
264 consumer_stream_destroy_buffers(stream);
265 /* Close down everything including the relayd if one. */
266 consumer_stream_close(stream);
267 }
268
269 /*
270 * Decrement the stream's channel refcount and if down to 0, return the channel
271 * pointer so it can be destroyed by the caller or NULL if not.
272 */
273 static struct lttng_consumer_channel *unref_channel(
274 struct lttng_consumer_stream *stream)
275 {
276 struct lttng_consumer_channel *free_chan = NULL;
277
278 assert(stream);
279 assert(stream->chan);
280
281 /* Update refcount of channel and see if we need to destroy it. */
282 if (!uatomic_sub_return(&stream->chan->refcount, 1)
283 && !uatomic_read(&stream->chan->nb_init_stream_left)) {
284 free_chan = stream->chan;
285 }
286
287 return free_chan;
288 }
289
290 /*
291 * Destroy a stream completely. This will delete, close and free the stream.
292 * Once return, the stream is NO longer usable. Its channel may get destroyed
293 * if conditions are met for a monitored stream.
294 *
295 * This MUST be called WITHOUT the consumer data and stream lock acquired if
296 * the stream is in _monitor_ mode else it does not matter.
297 */
298 void consumer_stream_destroy(struct lttng_consumer_stream *stream,
299 struct lttng_ht *ht)
300 {
301 assert(stream);
302
303 /* Stream is in monitor mode. */
304 if (stream->monitor) {
305 struct lttng_consumer_channel *free_chan = NULL;
306
307 /*
308 * This means that the stream was successfully removed from the streams
309 * list of the channel and sent to the right thread managing this
310 * stream thus being globally visible.
311 */
312 if (stream->globally_visible) {
313 pthread_mutex_lock(&consumer_data.lock);
314 pthread_mutex_lock(&stream->chan->lock);
315 pthread_mutex_lock(&stream->lock);
316 /* Remove every reference of the stream in the consumer. */
317 consumer_stream_delete(stream, ht);
318
319 destroy_close_stream(stream);
320
321 /* Update channel's refcount of the stream. */
322 free_chan = unref_channel(stream);
323
324 /* Indicates that the consumer data state MUST be updated after this. */
325 consumer_data.need_update = 1;
326
327 pthread_mutex_unlock(&stream->lock);
328 pthread_mutex_unlock(&stream->chan->lock);
329 pthread_mutex_unlock(&consumer_data.lock);
330 } else {
331 /*
332 * If the stream is not visible globally, this needs to be done
333 * outside of the consumer data lock section.
334 */
335 free_chan = unref_channel(stream);
336 }
337
338 if (free_chan) {
339 consumer_del_channel(free_chan);
340 }
341 } else {
342 destroy_close_stream(stream);
343 }
344
345 /* Free stream within a RCU call. */
346 consumer_stream_free(stream);
347 }
348
349 /*
350 * Write index of a specific stream either on the relayd or local disk.
351 *
352 * Return 0 on success or else a negative value.
353 */
354 int consumer_stream_write_index(struct lttng_consumer_stream *stream,
355 struct ctf_packet_index *element)
356 {
357 int ret;
358
359 assert(stream);
360 assert(element);
361
362 rcu_read_lock();
363 if (stream->net_seq_idx != (uint64_t) -1ULL) {
364 struct consumer_relayd_sock_pair *relayd;
365 relayd = consumer_find_relayd(stream->net_seq_idx);
366 if (relayd) {
367 pthread_mutex_lock(&relayd->ctrl_sock_mutex);
368 ret = relayd_send_index(&relayd->control_sock, element,
369 stream->relayd_stream_id, stream->next_net_seq_num - 1);
370 if (ret < 0) {
371 /*
372 * Communication error with lttng-relayd,
373 * perform cleanup now
374 */
375 ERR("Relayd send index failed. Cleaning up relayd %" PRIu64 ".", relayd->net_seq_idx);
376 lttng_consumer_cleanup_relayd(relayd);
377 ret = -1;
378 }
379 pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
380 } else {
381 ERR("Stream %" PRIu64 " relayd ID %" PRIu64 " unknown. Can't write index.",
382 stream->key, stream->net_seq_idx);
383 ret = -1;
384 }
385 } else {
386 if (lttng_index_file_write(stream->index_file, element)) {
387 ret = -1;
388 } else {
389 ret = 0;
390 }
391 }
392 if (ret < 0) {
393 goto error;
394 }
395
396 error:
397 rcu_read_unlock();
398 return ret;
399 }
400
401 /*
402 * Actually do the metadata sync using the given metadata stream.
403 *
404 * Return 0 on success else a negative value. ENODATA can be returned also
405 * indicating that there is no metadata available for that stream.
406 */
407 static int do_sync_metadata(struct lttng_consumer_stream *metadata,
408 struct lttng_consumer_local_data *ctx)
409 {
410 int ret;
411
412 assert(metadata);
413 assert(metadata->metadata_flag);
414 assert(ctx);
415
416 /*
417 * In UST, since we have to write the metadata from the cache packet
418 * by packet, we might need to start this procedure multiple times
419 * until all the metadata from the cache has been extracted.
420 */
421 do {
422 /*
423 * Steps :
424 * - Lock the metadata stream
425 * - Check if metadata stream node was deleted before locking.
426 * - if yes, release and return success
427 * - Check if new metadata is ready (flush + snapshot pos)
428 * - If nothing : release and return.
429 * - Lock the metadata_rdv_lock
430 * - Unlock the metadata stream
431 * - cond_wait on metadata_rdv to wait the wakeup from the
432 * metadata thread
433 * - Unlock the metadata_rdv_lock
434 */
435 pthread_mutex_lock(&metadata->lock);
436
437 /*
438 * There is a possibility that we were able to acquire a reference on the
439 * stream from the RCU hash table but between then and now, the node might
440 * have been deleted just before the lock is acquired. Thus, after locking,
441 * we make sure the metadata node has not been deleted which means that the
442 * buffers are closed.
443 *
444 * In that case, there is no need to sync the metadata hence returning a
445 * success return code.
446 */
447 ret = cds_lfht_is_node_deleted(&metadata->node.node);
448 if (ret) {
449 ret = 0;
450 goto end_unlock_mutex;
451 }
452
453 switch (ctx->type) {
454 case LTTNG_CONSUMER_KERNEL:
455 /*
456 * Empty the metadata cache and flush the current stream.
457 */
458 ret = lttng_kconsumer_sync_metadata(metadata);
459 break;
460 case LTTNG_CONSUMER32_UST:
461 case LTTNG_CONSUMER64_UST:
462 /*
463 * Ask the sessiond if we have new metadata waiting and update the
464 * consumer metadata cache.
465 */
466 ret = lttng_ustconsumer_sync_metadata(ctx, metadata);
467 break;
468 default:
469 assert(0);
470 ret = -1;
471 break;
472 }
473 /*
474 * Error or no new metadata, we exit here.
475 */
476 if (ret <= 0 || ret == ENODATA) {
477 goto end_unlock_mutex;
478 }
479
480 /*
481 * At this point, new metadata have been flushed, so we wait on the
482 * rendez-vous point for the metadata thread to wake us up when it
483 * finishes consuming the metadata and continue execution.
484 */
485
486 pthread_mutex_lock(&metadata->metadata_rdv_lock);
487
488 /*
489 * Release metadata stream lock so the metadata thread can process it.
490 */
491 pthread_mutex_unlock(&metadata->lock);
492
493 /*
494 * Wait on the rendez-vous point. Once woken up, it means the metadata was
495 * consumed and thus synchronization is achieved.
496 */
497 pthread_cond_wait(&metadata->metadata_rdv, &metadata->metadata_rdv_lock);
498 pthread_mutex_unlock(&metadata->metadata_rdv_lock);
499 } while (ret == EAGAIN);
500
501 /* Success */
502 return 0;
503
504 end_unlock_mutex:
505 pthread_mutex_unlock(&metadata->lock);
506 return ret;
507 }
508
509 /*
510 * Synchronize the metadata using a given session ID. A successful acquisition
511 * of a metadata stream will trigger a request to the session daemon and a
512 * snapshot so the metadata thread can consume it.
513 *
514 * This function call is a rendez-vous point between the metadata thread and
515 * the data thread.
516 *
517 * Return 0 on success or else a negative value.
518 */
519 int consumer_stream_sync_metadata(struct lttng_consumer_local_data *ctx,
520 uint64_t session_id)
521 {
522 int ret;
523 struct lttng_consumer_stream *stream = NULL;
524 struct lttng_ht_iter iter;
525 struct lttng_ht *ht;
526
527 assert(ctx);
528
529 /* Ease our life a bit. */
530 ht = consumer_data.stream_list_ht;
531
532 rcu_read_lock();
533
534 /* Search the metadata associated with the session id of the given stream. */
535
536 cds_lfht_for_each_entry_duplicate(ht->ht,
537 ht->hash_fct(&session_id, lttng_ht_seed), ht->match_fct,
538 &session_id, &iter.iter, stream, node_session_id.node) {
539 if (!stream->metadata_flag) {
540 continue;
541 }
542
543 ret = do_sync_metadata(stream, ctx);
544 if (ret < 0) {
545 goto end;
546 }
547 }
548
549 /*
550 * Force return code to 0 (success) since ret might be ENODATA for instance
551 * which is not an error but rather that we should come back.
552 */
553 ret = 0;
554
555 end:
556 rcu_read_unlock();
557 return ret;
558 }
This page took 0.045377 seconds and 4 git commands to generate.