Fix: relayd: connection abruptly closed on viewer stream creation failure
[lttng-tools.git] / src / bin / lttng-relayd / live.c
index 7d6dc1bc20a03a5af6cafe1e5ef170be1b7f5cb8..f1e440640aa465c9389f8061174e10657fa32a47 100644 (file)
@@ -284,13 +284,6 @@ static int make_viewer_streams(struct relay_session *relay_session,
        assert(relay_session);
        ASSERT_LOCKED(relay_session->lock);
 
-       if (!viewer_session->current_trace_chunk) {
-               ERR("Internal error: viewer session associated with session \"%s\" has a NULL trace chunk",
-                               relay_session->session_name);
-               ret = -1;
-               goto error;
-       }
-
        if (relay_session->connection_closed) {
                *closed = true;
        }
@@ -361,7 +354,7 @@ static int make_viewer_streams(struct relay_session *relay_session,
                        viewer_stream = viewer_stream_get_by_id(
                                        relay_stream->stream_handle);
                        if (!viewer_stream) {
-                               struct lttng_trace_chunk *viewer_stream_trace_chunk;
+                               struct lttng_trace_chunk *viewer_stream_trace_chunk = NULL;
 
                                /*
                                 * Save that we sent the metadata stream to the
@@ -381,7 +374,7 @@ static int make_viewer_streams(struct relay_session *relay_session,
                                 * chunk can be used safely.
                                 */
                                if ((relay_stream->ongoing_rotation.is_set ||
-                                                   relay_session->ongoing_rotation) &&
+                                               session_has_ongoing_rotation(relay_session)) &&
                                                relay_stream->trace_chunk) {
                                        viewer_stream_trace_chunk = lttng_trace_chunk_copy(
                                                        relay_stream->trace_chunk);
@@ -391,12 +384,42 @@ static int make_viewer_streams(struct relay_session *relay_session,
                                                goto error_unlock;
                                        }
                                } else {
-                                       const bool reference_acquired = lttng_trace_chunk_get(
-                                                       viewer_session->current_trace_chunk);
+                                       /*
+                                        * Transition the viewer session into the newest trace chunk available.
+                                        */
+                                       if (!lttng_trace_chunk_ids_equal(viewer_session->current_trace_chunk,
+                                                       relay_stream->trace_chunk)) {
+
+                                               ret = viewer_session_set_trace_chunk_copy(
+                                                               viewer_session,
+                                                               relay_stream->trace_chunk);
+                                               if (ret) {
+                                                       ret = -1;
+                                                       ctf_trace_put(ctf_trace);
+                                                       goto error_unlock;
+                                               }
+                                       }
 
-                                       assert(reference_acquired);
-                                       viewer_stream_trace_chunk =
-                                                       viewer_session->current_trace_chunk;
+                                       if (relay_stream->trace_chunk) {
+                                               /*
+                                                * If the corresponding relay
+                                                * stream's trace chunk is set,
+                                                * the viewer stream will be
+                                                * created under it.
+                                                *
+                                                * Note that a relay stream can
+                                                * have a NULL output trace
+                                                * chunk (for instance, after a
+                                                * clear against a stopped
+                                                * session).
+                                                */
+                                               const bool reference_acquired = lttng_trace_chunk_get(
+                                                               viewer_session->current_trace_chunk);
+
+                                               assert(reference_acquired);
+                                               viewer_stream_trace_chunk =
+                                                               viewer_session->current_trace_chunk;
+                                       }
                                }
 
                                viewer_stream = viewer_stream_create(
@@ -460,7 +483,7 @@ static int make_viewer_streams(struct relay_session *relay_session,
 
 error_unlock:
        rcu_read_unlock();
-error:
+
        if (relay_stream) {
                pthread_mutex_unlock(&relay_stream->lock);
                stream_put(relay_stream);
@@ -1018,14 +1041,6 @@ int viewer_list_sessions(struct relay_connection *conn)
                        /* Skip closed session */
                        goto next_session;
                }
-               if (!session->current_trace_chunk) {
-                       /*
-                        * Skip un-attachable session. It is either
-                        * being destroyed or has not had a trace
-                        * chunk created against it yet.
-                        */
-                       goto next_session;
-               }
 
                if (count >= buf_count) {
                        struct lttng_viewer_session *newbuf;
@@ -1140,13 +1155,37 @@ int viewer_get_new_streams(struct relay_connection *conn)
                goto send_reply;
        }
 
+       /*
+        * For any new stream, create it with LTTNG_VIEWER_SEEK_BEGINNING since
+        * that at this point the client is already attached to the session.Aany
+        * initial stream will have been created with the seek type at attach
+        * time (for now most readers use the LTTNG_VIEWER_SEEK_LAST on attach).
+        * Otherwise any event happening in a new stream between the attach and
+        * a call to viewer_get_new_streams will be "lost" (never received) from
+        * the viewer's point of view.
+        */
        pthread_mutex_lock(&session->lock);
+       /*
+        * If a session rotation is ongoing, do not attempt to open any
+        * stream, because the chunk can be in an intermediate state
+        * due to directory renaming.
+        */
+       if (session_has_ongoing_rotation(session)) {
+               DBG("Relay session %" PRIu64 " rotation ongoing", session_id);
+               response.status = htobe32(LTTNG_VIEWER_NEW_STREAMS_NO_NEW);
+               goto send_reply_unlock;
+       }
        ret = make_viewer_streams(session,
                        conn->viewer_session,
-                       LTTNG_VIEWER_SEEK_LAST, &nb_total, &nb_unsent,
+                       LTTNG_VIEWER_SEEK_BEGINNING, &nb_total, &nb_unsent,
                        &nb_created, &closed);
        if (ret < 0) {
-               goto error_unlock_session;
+               /*
+                * This is caused by an internal error; propagate the negative
+                * 'ret' to close the connection.
+                */
+               response.status = htobe32(LTTNG_VIEWER_NEW_STREAMS_ERR);
+               goto send_reply;
        }
        send_streams = 1;
        response.status = htobe32(LTTNG_VIEWER_NEW_STREAMS_OK);
@@ -1201,10 +1240,6 @@ end_put_session:
        }
 error:
        return ret;
-error_unlock_session:
-       pthread_mutex_unlock(&session->lock);
-       session_put(session);
-       return ret;
 }
 
 /*
@@ -1254,15 +1289,6 @@ int viewer_attach_session(struct relay_connection *conn)
        DBG("Attach session ID %" PRIu64 " received", session_id);
 
        pthread_mutex_lock(&session->lock);
-       if (!session->current_trace_chunk) {
-               /*
-                * Session is either being destroyed or it never had a trace
-                * chunk created against it.
-                */
-               DBG("Session requested by live client has no current trace chunk, returning unknown session");
-               response.status = htobe32(LTTNG_VIEWER_ATTACH_UNK);
-               goto send_reply;
-       }
        if (session->live_timer == 0) {
                DBG("Not live session");
                response.status = htobe32(LTTNG_VIEWER_ATTACH_NOT_LIVE);
@@ -1290,6 +1316,17 @@ int viewer_attach_session(struct relay_connection *conn)
                goto send_reply;
        }
 
+       /*
+        * If a session rotation is ongoing, do not attempt to open any
+        * stream, because the chunk can be in an intermediate state
+        * due to directory renaming.
+        */
+       if (session_has_ongoing_rotation(session)) {
+               DBG("Relay session %" PRIu64 " rotation ongoing", session_id);
+               send_streams = 0;
+               goto send_reply;
+       }
+
        ret = make_viewer_streams(session,
                        conn->viewer_session, seek_type,
                        &nb_streams, NULL, NULL, &closed);
@@ -1371,10 +1408,12 @@ static int try_open_index(struct relay_viewer_stream *vstream,
        /*
         * First time, we open the index file and at least one index is ready.
         */
-       if (rstream->index_received_seqcount == 0) {
+       if (rstream->index_received_seqcount == 0 ||
+                       !vstream->stream_file.trace_chunk) {
                ret = -ENOENT;
                goto end;
        }
+
        chunk_status = lttng_index_file_create_from_trace_chunk_read_only(
                        vstream->stream_file.trace_chunk, rstream->path_name,
                        rstream->channel_name, rstream->tracefile_size,
@@ -1528,6 +1567,24 @@ index_ready:
        return 1;
 }
 
+static
+void viewer_stream_rotate_to_trace_chunk(struct relay_viewer_stream *vstream,
+                struct lttng_trace_chunk *new_trace_chunk)
+{
+       lttng_trace_chunk_put(vstream->stream_file.trace_chunk);
+
+       if (new_trace_chunk) {
+               const bool acquired_reference = lttng_trace_chunk_get(
+                               new_trace_chunk);
+
+               assert(acquired_reference);
+       }
+
+       vstream->stream_file.trace_chunk = new_trace_chunk;
+       viewer_stream_sync_tracefile_array_tail(vstream);
+       viewer_stream_close_files(vstream);
+}
+
 /*
  * Send the next index for a stream.
  *
@@ -1574,6 +1631,13 @@ int viewer_get_next_index(struct relay_connection *conn)
        metadata_viewer_stream =
                        ctf_trace_get_viewer_metadata_stream(ctf_trace);
 
+       /*
+        * Hold the session lock to protect against concurrent changes
+        * to the chunk files (e.g. rename done by clear), which are
+        * protected by the session ongoing rotation state. Those are
+        * synchronized with the session lock.
+        */
+       pthread_mutex_lock(&rstream->trace->session->lock);
        pthread_mutex_lock(&rstream->lock);
 
        /*
@@ -1590,13 +1654,16 @@ int viewer_get_next_index(struct relay_connection *conn)
                goto send_reply;
        }
 
-       if (rstream->trace->session->ongoing_rotation) {
+       if (session_has_ongoing_rotation(rstream->trace->session)) {
                /* Rotation is ongoing, try again later. */
                viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_RETRY);
                goto send_reply;
        }
 
-       if (rstream->trace_chunk && !lttng_trace_chunk_ids_equal(
+       /*
+        * Transition the viewer session into the newest trace chunk available.
+        */
+       if (!lttng_trace_chunk_ids_equal(
                        conn->viewer_session->current_trace_chunk,
                        rstream->trace_chunk)) {
                DBG("Relay stream and viewer chunk ids differ");
@@ -1609,21 +1676,28 @@ int viewer_get_next_index(struct relay_connection *conn)
                        goto send_reply;
                }
        }
-       if (conn->viewer_session->current_trace_chunk !=
-                       vstream->stream_file.trace_chunk) {
-               bool acquired_reference;
 
-               DBG("Viewer session and viewer stream chunk differ: "
+       /*
+        * Transition the viewer stream into the latest trace chunk available.
+        *
+        * Note that the stream must _not_ rotate in one precise condition:
+        * the relay stream has rotated to a NULL trace chunk and the viewer
+        * stream is consuming the trace chunk that was active just before
+        * that rotation to NULL.
+        *
+        * This allows clients to consume all the packets of a trace chunk
+        * after a session's destruction.
+        */
+       if (!lttng_trace_chunk_ids_equal(conn->viewer_session->current_trace_chunk, vstream->stream_file.trace_chunk) &&
+                       !(rstream->completed_rotation_count == vstream->last_seen_rotation_count + 1 && !rstream->trace_chunk)) {
+               DBG("Viewer session and viewer stream chunk IDs differ: "
                                "vsession chunk %p vstream chunk %p",
                                conn->viewer_session->current_trace_chunk,
                                vstream->stream_file.trace_chunk);
-               lttng_trace_chunk_put(vstream->stream_file.trace_chunk);
-               acquired_reference = lttng_trace_chunk_get(conn->viewer_session->current_trace_chunk);
-               assert(acquired_reference);
-               vstream->stream_file.trace_chunk =
-                       conn->viewer_session->current_trace_chunk;
-               viewer_stream_sync_tracefile_array_tail(vstream);
-               viewer_stream_close_files(vstream);
+               viewer_stream_rotate_to_trace_chunk(vstream,
+                               conn->viewer_session->current_trace_chunk);
+               vstream->last_seen_rotation_count =
+                               rstream->completed_rotation_count;
        }
 
        ret = check_index_status(vstream, rstream, ctf_trace, &viewer_index);
@@ -1730,6 +1804,7 @@ int viewer_get_next_index(struct relay_connection *conn)
 send_reply:
        if (rstream) {
                pthread_mutex_unlock(&rstream->lock);
+               pthread_mutex_unlock(&rstream->trace->session->lock);
        }
 
        if (metadata_viewer_stream) {
@@ -1771,6 +1846,7 @@ end:
 
 error_put:
        pthread_mutex_unlock(&rstream->lock);
+       pthread_mutex_unlock(&rstream->trace->session->lock);
        if (metadata_viewer_stream) {
                viewer_stream_put(metadata_viewer_stream);
        }
@@ -1854,6 +1930,8 @@ int viewer_get_packet(struct relay_connection *conn)
        goto send_reply;
 
 error:
+       /* No payload to send on error. */
+       reply_size = sizeof(reply_header);
        reply_header.status = htobe32(LTTNG_VIEWER_GET_PACKET_ERR);
 
 send_reply:
@@ -1957,11 +2035,11 @@ int viewer_get_metadata(struct relay_connection *conn)
                 * an error.
                 */
                if (vstream->metadata_sent > 0) {
-                       vstream->stream->no_new_metadata_notified = true;
-                       if (vstream->stream->closed) {
+                       if (vstream->stream->closed && vstream->stream->no_new_metadata_notified) {
                                /* Release ownership for the viewer metadata stream. */
                                viewer_stream_put(vstream);
                        }
+                       vstream->stream->no_new_metadata_notified = true;
                }
                goto send_reply;
        }
@@ -1982,8 +2060,9 @@ int viewer_get_metadata(struct relay_connection *conn)
                }
        }
 
-       if (conn->viewer_session->current_trace_chunk !=
-                       vstream->stream_file.trace_chunk) {
+       if (conn->viewer_session->current_trace_chunk &&
+                       !lttng_trace_chunk_ids_equal(conn->viewer_session->current_trace_chunk,
+                                       vstream->stream_file.trace_chunk)) {
                bool acquired_reference;
 
                DBG("Viewer session and viewer stream chunk differ: "
@@ -2000,11 +2079,16 @@ int viewer_get_metadata(struct relay_connection *conn)
 
        len = vstream->stream->metadata_received - vstream->metadata_sent;
 
-       /*
-        * Either this is the first time the metadata file is read, or a
-        * rotation of the corresponding relay stream has occured.
-        */
-       if (!vstream->stream_file.handle && len > 0) {
+       if (!vstream->stream_file.trace_chunk) {
+               reply.status = htobe32(LTTNG_VIEWER_NO_NEW_METADATA);
+               len = 0;
+               goto send_reply;
+       } else if (vstream->stream_file.trace_chunk &&
+                       !vstream->stream_file.handle && len > 0) {
+               /*
+                * Either this is the first time the metadata file is read, or a
+                * rotation of the corresponding relay stream has occurred.
+                */
                struct fs_handle *fs_handle;
                char file_path[LTTNG_PATH_MAX];
                enum lttng_trace_chunk_status status;
This page took 0.027882 seconds and 4 git commands to generate.