document lttng_ustconsumer_request_metadata locking constraints
[lttng-tools.git] / src / common / ust-consumer / ust-consumer.c
index 780a601f67e74b70b3632a6d489067d82e1359ef..8926c1c1604e79968fa0364462c2529315b98857 100644 (file)
@@ -113,14 +113,16 @@ error:
  */
 static struct lttng_consumer_channel *allocate_channel(uint64_t session_id,
                const char *pathname, const char *name, uid_t uid, gid_t gid,
-               int relayd_id, uint64_t key, enum lttng_event_output output,
-               uint64_t tracefile_size, uint64_t tracefile_count)
+               uint64_t relayd_id, uint64_t key, enum lttng_event_output output,
+               uint64_t tracefile_size, uint64_t tracefile_count,
+               uint64_t session_id_per_pid)
 {
        assert(pathname);
        assert(name);
 
-       return consumer_allocate_channel(key, session_id, pathname, name, uid, gid,
-                       relayd_id, output, tracefile_size, tracefile_count);
+       return consumer_allocate_channel(key, session_id, pathname, name, uid,
+                       gid, relayd_id, output, tracefile_size,
+                       tracefile_count, session_id_per_pid);
 }
 
 /*
@@ -373,7 +375,7 @@ static int send_sessiond_stream(int sock, struct lttng_consumer_stream *stream)
        assert(stream);
        assert(sock >= 0);
 
-       DBG2("UST consumer sending stream %" PRIu64 " to sessiond", stream->key);
+       DBG("UST consumer sending stream %" PRIu64 " to sessiond", stream->key);
 
        /* Send stream to session daemon. */
        ret = ustctl_send_stream_to_sessiond(sock, stream->ustream);
@@ -394,7 +396,7 @@ static int send_sessiond_channel(int sock,
                struct lttng_consumer_channel *channel,
                struct lttng_consumer_local_data *ctx, int *relayd_error)
 {
-       int ret;
+       int ret, ret_code = LTTNG_OK;
        struct lttng_consumer_stream *stream;
 
        assert(channel);
@@ -403,18 +405,6 @@ static int send_sessiond_channel(int sock,
 
        DBG("UST consumer sending channel %s to sessiond", channel->name);
 
-       /* Send channel to sessiond. */
-       ret = ustctl_send_channel_to_sessiond(sock, channel->uchan);
-       if (ret < 0) {
-               goto error;
-       }
-
-       ret = ustctl_channel_close_wakeup_fd(channel->uchan);
-       if (ret < 0) {
-               goto error;
-       }
-
-       /* The channel was sent successfully to the sessiond at this point. */
        cds_list_for_each_entry(stream, &channel->streams.head, send_node) {
                /* Try to send the stream to the relayd if one is available. */
                ret = send_stream_to_relayd(stream);
@@ -426,9 +416,33 @@ static int send_sessiond_channel(int sock,
                        if (relayd_error) {
                                *relayd_error = 1;
                        }
-                       goto error;
+                       ret_code = LTTNG_ERR_RELAYD_CONNECT_FAIL;
                }
+       }
+
+       /* Inform sessiond that we are about to send channel and streams. */
+       ret = consumer_send_status_msg(sock, ret_code);
+       if (ret < 0 || ret_code != LTTNG_OK) {
+               /*
+                * Either the session daemon is not responding or the relayd died so we
+                * stop now.
+                */
+               goto error;
+       }
 
+       /* Send channel to sessiond. */
+       ret = ustctl_send_channel_to_sessiond(sock, channel->uchan);
+       if (ret < 0) {
+               goto error;
+       }
+
+       ret = ustctl_channel_close_wakeup_fd(channel->uchan);
+       if (ret < 0) {
+               goto error;
+       }
+
+       /* The channel was sent successfully to the sessiond at this point. */
+       cds_list_for_each_entry(stream, &channel->streams.head, send_node) {
                /* Send stream to session daemon. */
                ret = send_sessiond_stream(sock, stream);
                if (ret < 0) {
@@ -447,6 +461,9 @@ static int send_sessiond_channel(int sock,
        return 0;
 
 error:
+       if (ret_code != LTTNG_OK) {
+               ret = -1;
+       }
        return ret;
 }
 
@@ -636,6 +653,7 @@ static int close_metadata(uint64_t chan_key)
        }
 
        pthread_mutex_lock(&consumer_data.lock);
+       pthread_mutex_lock(&channel->lock);
 
        if (cds_lfht_is_node_deleted(&channel->node.node)) {
                goto error_unlock;
@@ -656,6 +674,7 @@ static int close_metadata(uint64_t chan_key)
        }
 
 error_unlock:
+       pthread_mutex_unlock(&channel->lock);
        pthread_mutex_unlock(&consumer_data.lock);
 error:
        return ret;
@@ -677,7 +696,7 @@ static int setup_metadata(struct lttng_consumer_local_data *ctx, uint64_t key)
        if (!metadata) {
                ERR("UST consumer push metadata %" PRIu64 " not found", key);
                ret = LTTNG_ERR_UST_CHAN_NOT_FOUND;
-               goto error;
+               goto error_find;
        }
 
        /*
@@ -709,9 +728,17 @@ static int setup_metadata(struct lttng_consumer_local_data *ctx, uint64_t key)
        /* List MUST be empty after or else it could be reused. */
        assert(cds_list_empty(&metadata->streams.head));
 
-       ret = 0;
+       return 0;
 
 error:
+       /*
+        * Delete metadata channel on error. At this point, the metadata stream can
+        * NOT be monitored by the metadata thread thus having the guarantee that
+        * the stream is still in the local stream list of the channel. This call
+        * will make sure to clean that list.
+        */
+       consumer_del_channel(metadata);
+error_find:
        return ret;
 }
 
@@ -751,7 +778,7 @@ int lttng_ustconsumer_recv_metadata(int sock, uint64_t key, uint64_t offset,
         * and ultimately try to get rid of this global consumer data lock.
         */
        pthread_mutex_lock(&consumer_data.lock);
-
+       pthread_mutex_lock(&channel->lock);
        pthread_mutex_lock(&channel->metadata_cache->lock);
        ret = consumer_metadata_cache_write(channel, offset, len, metadata_str);
        if (ret < 0) {
@@ -763,10 +790,12 @@ int lttng_ustconsumer_recv_metadata(int sock, uint64_t key, uint64_t offset,
                 * waiting for the metadata cache to be flushed.
                 */
                pthread_mutex_unlock(&channel->metadata_cache->lock);
+               pthread_mutex_unlock(&channel->lock);
                pthread_mutex_unlock(&consumer_data.lock);
                goto end_free;
        }
        pthread_mutex_unlock(&channel->metadata_cache->lock);
+       pthread_mutex_unlock(&channel->lock);
        pthread_mutex_unlock(&consumer_data.lock);
 
        while (consumer_metadata_cache_flushed(channel, offset + len)) {
@@ -901,7 +930,8 @@ int lttng_ustconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
                                msg.u.ask_channel.relayd_id, msg.u.ask_channel.key,
                                (enum lttng_event_output) msg.u.ask_channel.output,
                                msg.u.ask_channel.tracefile_size,
-                               msg.u.ask_channel.tracefile_count);
+                               msg.u.ask_channel.tracefile_count,
+                               msg.u.ask_channel.session_id_per_pid);
                if (!channel) {
                        goto end_channel_error;
                }
@@ -1007,13 +1037,6 @@ int lttng_ustconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
                        goto end_msg_sessiond;
                }
 
-               /* Inform sessiond that we are about to send channel and streams. */
-               ret = consumer_send_status_msg(sock, LTTNG_OK);
-               if (ret < 0) {
-                       /* Somehow, the session daemon is not responding anymore. */
-                       goto error_fatal;
-               }
-
                /* Send everything to sessiond. */
                ret = send_sessiond_channel(sock, channel, ctx, &relayd_err);
                if (ret < 0) {
@@ -1021,10 +1044,10 @@ int lttng_ustconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
                                /*
                                 * We were unable to send to the relayd the stream so avoid
                                 * sending back a fatal error to the thread since this is OK
-                                * and the consumer can continue its work.
+                                * and the consumer can continue its work. The above call
+                                * has sent the error status message to the sessiond.
                                 */
-                               ret_code = LTTNG_ERR_RELAYD_CONNECT_FAIL;
-                               goto end_msg_sessiond;
+                               goto end_nosignal;
                        }
                        /*
                         * The communicaton was broken hence there is a bad state between
@@ -1278,7 +1301,7 @@ int lttng_ustconsumer_read_subbuffer(struct lttng_consumer_stream *stream,
        assert(stream->ustream);
        assert(ctx);
 
-       DBG2("In UST read_subbuffer (wait_fd: %d, name: %s)", stream->wait_fd,
+       DBG("In UST read_subbuffer (wait_fd: %d, name: %s)", stream->wait_fd,
                        stream->name);
 
        /* Ease our life for what's next. */
@@ -1394,6 +1417,11 @@ int lttng_ustconsumer_data_pending(struct lttng_consumer_stream *stream)
 
        DBG("UST consumer checking data pending");
 
+       if (stream->endpoint_status != CONSUMER_ENDPOINT_ACTIVE) {
+               ret = 0;
+               goto end;
+       }
+
        ret = ustctl_get_next_subbuf(stream->ustream);
        if (ret == 0) {
                /* There is still data so let's put back this subbuffer. */
@@ -1458,6 +1486,12 @@ void lttng_ustconsumer_close_stream_wakeup(struct lttng_consumer_stream *stream)
        }
 }
 
+/*
+ * Please refer to consumer-timer.c before adding any lock within this
+ * function or any of its callees. Timers have a very strict locking
+ * semantic with respect to teardown. Failure to respect this semantic
+ * introduces deadlocks.
+ */
 int lttng_ustconsumer_request_metadata(struct lttng_consumer_local_data *ctx,
                struct lttng_consumer_channel *channel)
 {
@@ -1484,10 +1518,13 @@ int lttng_ustconsumer_request_metadata(struct lttng_consumer_local_data *ctx,
        }
 
        request.session_id = channel->session_id;
+       request.session_id_per_pid = channel->session_id_per_pid;
        request.uid = channel->uid;
        request.key = channel->key;
-       DBG("Sending metadata request to sessiond, session %" PRIu64,
-                       channel->session_id);
+       DBG("Sending metadata request to sessiond, session id %" PRIu64
+                       ", per-pid %" PRIu64,
+                       channel->session_id,
+                       channel->session_id_per_pid);
 
        ret = lttcomm_send_unix_sock(ctx->consumer_metadata_socket, &request,
                        sizeof(request));
@@ -1544,7 +1581,13 @@ int lttng_ustconsumer_request_metadata(struct lttng_consumer_local_data *ctx,
 
        ret_code = lttng_ustconsumer_recv_metadata(ctx->consumer_metadata_socket,
                        key, offset, len, channel);
-       (void) consumer_send_status_msg(ctx->consumer_metadata_socket, ret_code);
+       if (ret_code >= 0) {
+               /*
+                * Only send the status msg if the sessiond is alive meaning a positive
+                * ret code.
+                */
+               (void) consumer_send_status_msg(ctx->consumer_metadata_socket, ret_code);
+       }
        ret = 0;
 
 end:
This page took 0.026378 seconds and 4 git commands to generate.