From f75135f45f3cfead535cff72103217b029d0d5da Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=A9mie=20Galarneau?= Date: Wed, 2 Mar 2022 12:37:39 -0500 Subject: [PATCH] Fix: relayd: live client fails on clear of multi-domain session MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Observed issue ============== Two test cases of the clear/test_ust test suite occasionally fail in the integration jobs testing cross-bitness (32/64) LTTng deployments. Babeltrace fails with the following error when a clear occurs while a live client consumes a trace: 02-28 16:55:03.262 32362 32362 E PLUGIN/SRC.CTF.LTTNG-LIVE/VIEWER lttng_live_recv@viewer-connection.c:198 [lttng-live] Remote side has closed connection 02-28 16:55:03.262 32362 32362 E PLUGIN/SRC.CTF.LTTNG-LIVE/VIEWER lttng_live_session_get_new_streams@viewer-connection.c:1706 [lttng-live] Error receiving get new streams reply 02-28 16:55:03.262 32362 32362 E PLUGIN/SRC.CTF.LTTNG-LIVE lttng_live_msg_iter_next@lttng-live.c:1665 [lttng-live] Error preparing the next batch of messages: live-iter-status=LTTNG_LIVE_ITERATOR_STATUS_ERROR 02-28 16:55:03.262 32362 32362 W LIB/MSG-ITER bt_message_iterator_next@iterator.c:864 Component input port message iterator's "next" method failed: iter-addr=0x55eab7eb1170, iter-upstream-comp-name="lttng-live", iter-upstream-comp-log-level=WARNING, iter-upstream-comp-class-type=SOURCE, iter-upstream-comp-class-name="lttng-live", iter-upstream-comp-class-partial-descr="Connect to an LTTng relay daemon", iter-upstream-port-type=OUTPUT, iter-upstream-port-name="out", status=ERROR 02-28 16:55:03.262 32362 32362 E PLUGIN/FLT.UTILS.MUXER muxer_upstream_msg_iter_next@muxer.c:454 [muxer] Upstream iterator's next method returned an error: status=ERROR 02-28 16:55:03.262 32362 32362 E PLUGIN/FLT.UTILS.MUXER validate_muxer_upstream_msg_iters@muxer.c:991 [muxer] Cannot validate muxer's upstream message iterator wrapper: muxer-msg-iter-addr=0x55eab7eb1120, muxer-upstream-msg-iter-wrap-addr=0x55eab7eb3a70 02-28 16:55:03.262 32362 32362 E PLUGIN/FLT.UTILS.MUXER muxer_msg_iter_next@muxer.c:1415 [muxer] Cannot get next message: comp-addr=0x55eab7eb0470, muxer-comp-addr=0x55eab7eb0510, muxer-msg-iter-addr=0x55eab7eb1120, msg-iter-addr=0x55eab7eb0fb0, status=ERROR 02-28 16:55:03.262 32362 32362 W LIB/MSG-ITER bt_message_iterator_next@iterator.c:864 Component input port message iterator's "next" method failed: iter-addr=0x55eab7eb0fb0, iter-upstream-comp-name="muxer", iter-upstream-comp-log-level=WARNING, iter-upstream-comp-class-type=FILTER, iter-upstream-comp-class-name="muxer", iter-upstream-comp-class-partial-descr="Sort messages from multiple inpu", iter-upstream-port-type=OUTPUT, iter-upstream-port-name="out", status=ERROR 02-28 16:55:03.262 32362 32362 W LIB/GRAPH consume_graph_sink@graph.c:473 Component's "consume" method failed: status=ERROR, comp-addr=0x55eab7eb0760, comp-name="pretty", comp-log-level=WARNING, comp-class-type=SINK, comp-class-name="pretty", comp-class-partial-descr="Pretty-print messages (`text` fo", comp-class-is-frozen=1, comp-class-so-handle-addr=0x55eab7ebd910, comp-class-so-handle-path="/root/workspace/joraj_integration_base_job/deps-64/build/lib/babeltrace2/plugins/babeltrace-plugin-text.so", comp-input-port-count=1, comp-output-port-count=0 02-28 16:55:03.262 32362 32362 E CLI cmd_run@babeltrace2.c:2548 Graph failed to complete successfully ERROR: [Babeltrace CLI] (babeltrace2.c:2548) Graph failed to complete successfully CAUSED BY [libbabeltrace2] (graph.c:473) Component's "consume" method failed: status=ERROR, comp-addr=0x55eab7eb0760, comp-name="pretty", comp-log-level=WARNING, comp-class-type=SINK, comp-class-name="pretty", comp-class-partial-descr="Pretty-print messages (`text` fo", comp-class-is-frozen=1, comp-class-so-handle-addr=0x55eab7ebd910, comp-class-so-handle-path="/root/workspace/joraj_integration_base_job/deps-64/build/lib/babeltrace2/plugins/babeltrace-plugin-text.so", comp-input-port-count=1, comp-output-port-count=0 CAUSED BY [libbabeltrace2] (iterator.c:864) Component input port message iterator's "next" method failed: iter-addr=0x55eab7eb0fb0, iter-upstream-comp-name="muxer", iter-upstream-comp-log-level=WARNING, iter-upstream-comp-class-type=FILTER, iter-upstream-comp-class-name="muxer", iter-upstream-comp-class-partial-descr="Sort messages from multiple inpu", iter-upstream-port-type=OUTPUT, iter-upstream-port-name="out", status=ERROR CAUSED BY [muxer: 'filter.utils.muxer'] (muxer.c:991) Cannot validate muxer's upstream message iterator wrapper: muxer-msg-iter-addr=0x55eab7eb1120, muxer-upstream-msg-iter-wrap-addr=0x55eab7eb3a70 CAUSED BY [muxer: 'filter.utils.muxer'] (muxer.c:454) Upstream iterator's next method returned an error: status=ERROR CAUSED BY [libbabeltrace2] (iterator.c:864) Component input port message iterator's "next" method failed: iter-addr=0x55eab7eb1170, iter-upstream-comp-name="lttng-live", iter-upstream-comp-log-level=WARNING, iter-upstream-comp-class-type=SOURCE, iter-upstream-comp-class-name="lttng-live", iter-upstream-comp-class-partial-descr="Connect to an LTTng relay daemon", iter-upstream-port-type=OUTPUT, iter-upstream-port-name="out", status=ERROR CAUSED BY [lttng-live: 'source.ctf.lttng-live'] (lttng-live.c:1665) Error preparing the next batch of messages: live-iter-status=LTTNG_LIVE_ITERATOR_STATUS_ERROR CAUSED BY [lttng-live: 'source.ctf.lttng-live'] (viewer-connection.c:1706) Error receiving get new streams reply CAUSED BY [lttng-live: 'source.ctf.lttng-live'] (viewer-connection.c:198) Remote side has closed connection Looking at the relay daemon logs, we see the following error: DBG1 - 16:55:03.262106718 [32139/32146]: Adding new file "ust/pid/gen-ust-events-32373-20220228-165503/chan_0" to trace chunk "(unnamed)" (in lttng_trace_chunk_add_file() at trace-chunk.cpp:1310) PERROR - 16:55:03.262133333 [32139/32146]: Failed to open fs handle to ust/pid/gen-ust-events-32373-20220228-165503/chan_0, open() returned: No such file or directory (in fd_tracker_open_fs_handle() at fd-tracker/fd-tracker.cpp:548) Cause ===== Adding more debugging logging allows us to see the following situation takes place: - relay thread: Create trace chunk on session 1. - live thread: get new streams against session 1, returns NO_NEW_STREAMS since the session has an 'ongoing_rotation'. - live thread: get new streams against session 2, sees no rotation ongoing and attempts to open `chan_0` when creating a viewer stream The "ongoing rotation" check was introduced in a7ceb342d and, in a nutshell, prevents live viewers from creating new viewer streams during a rotation. The "ongoing rotation" state is entered when a CREATE_NEW_TRACE_CHUNK command is issued against a session. However, this presumes that a relay_session maps 1:1 to a session on the session daemon's end. This isn't the case as, in multi-domain scenarios (tracing 32-bit, 64-bit, and kernel events), a single session daemon session can map to multiple relay_session objects. This is because the consumer daemons maintain independant connections to the relay daemon. To synchronize rotations accross related relay_session instances, the relay daemon uses the same trace chunk instances accross relay_session instances. This means that while a trace chunk is created against a specific relay session, it can be used by other relay_session instances. To manage shared trace chunks between relay_sessions, the relay daemon makes use of the trace_chunk_registry. This registry allows relay_sessions to share trace chunk instances using a unique key tuple: - session daemon instance uuid, - session daemon session id, - trace chunk id. There is no equivalent mechanism to track the "ongoing_rotation" state accross relay_sessions originating from the same sessiond session. In the current scenario, this causes the live client to correctly see that no new streams are available for session 1 (say, the 32-bit user space session). Unfortunately, this state is not entered for other sessions (64-bit and kernel relay sessions). Hence, the viewer succeds in acquiring new streams from session 2, exposing the race the 'ongoing rotation' state aims to protect against. Solution ======== Like the trace chunk instances, the "ongoing rotation" state must be shared accross relay sessions that originate from the same session daemon session. To "emulate" this shared state, session_has_ongoing_rotation() checks if any relay session originating from the same sessiond session have an ongoing rotation. If it is the case, we temporarily prevent live viewers from acquiring new streams. Known drawbacks =============== session_has_ongoing_rotation() iterates over all sessions, acquiring their lock in the process, which is certainly undesirable from a performance standpoint. Optimizing this is not a great challenge, but is beyond the scope of this immediate fix. Signed-off-by: Jérémie Galarneau Change-Id: I457a32fa497d151ca933c25433c80665268a7c1c --- src/bin/lttng-relayd/live.c | 8 ++-- src/bin/lttng-relayd/session.c | 85 ++++++++++++++++++++++++++++++++++ src/bin/lttng-relayd/session.h | 4 +- 3 files changed, 92 insertions(+), 5 deletions(-) diff --git a/src/bin/lttng-relayd/live.c b/src/bin/lttng-relayd/live.c index 11e2f799d..7de33d561 100644 --- a/src/bin/lttng-relayd/live.c +++ b/src/bin/lttng-relayd/live.c @@ -374,7 +374,7 @@ static int make_viewer_streams(struct relay_session *relay_session, * chunk can be used safely. */ if ((relay_stream->ongoing_rotation.is_set || - relay_session->ongoing_rotation) && + session_has_ongoing_rotation(relay_session)) && relay_stream->trace_chunk) { viewer_stream_trace_chunk = lttng_trace_chunk_copy( relay_stream->trace_chunk); @@ -1170,7 +1170,7 @@ int viewer_get_new_streams(struct relay_connection *conn) * stream, because the chunk can be in an intermediate state * due to directory renaming. */ - if (session->ongoing_rotation) { + if (session_has_ongoing_rotation(session)) { DBG("Relay session %" PRIu64 " rotation ongoing", session_id); response.status = htobe32(LTTNG_VIEWER_NEW_STREAMS_NO_NEW); goto send_reply_unlock; @@ -1320,7 +1320,7 @@ int viewer_attach_session(struct relay_connection *conn) * stream, because the chunk can be in an intermediate state * due to directory renaming. */ - if (session->ongoing_rotation) { + if (session_has_ongoing_rotation(session)) { DBG("Relay session %" PRIu64 " rotation ongoing", session_id); send_streams = 0; goto send_reply; @@ -1653,7 +1653,7 @@ int viewer_get_next_index(struct relay_connection *conn) goto send_reply; } - if (rstream->trace->session->ongoing_rotation) { + if (session_has_ongoing_rotation(rstream->trace->session)) { /* Rotation is ongoing, try again later. */ viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_RETRY); goto send_reply; diff --git a/src/bin/lttng-relayd/session.c b/src/bin/lttng-relayd/session.c index 6c30821ff..6b98431d1 100644 --- a/src/bin/lttng-relayd/session.c +++ b/src/bin/lttng-relayd/session.c @@ -474,6 +474,91 @@ end: return session; } +/* + * Check if any of the relay sessions originating from the same + * session daemon session have the 'ongoing_rotation' state set. + * + * The caller must hold the lock of session. + */ +bool session_has_ongoing_rotation(const struct relay_session *session) +{ + bool ongoing_rotation = false; + struct lttng_ht_iter iter; + struct relay_session *iterated_session; + + ASSERT_LOCKED(session->lock); + + if (!session->id_sessiond.is_set) { + /* + * The peer that created this session is too old to + * support rotations; we can assume no rotations are ongoing. + */ + goto end; + } + + if (session->ongoing_rotation) { + ongoing_rotation = true; + goto end; + } + + rcu_read_lock(); + /* + * Sample the 'ongoing_rotation' status of all relay sessions that + * originate from the same session daemon session. + */ + cds_lfht_for_each_entry(sessions_ht->ht, &iter.iter, iterated_session, + session_n.node) { + if (!session_get(iterated_session)) { + continue; + } + + if (session == iterated_session) { + /* Skip this session. */ + goto next_session_no_unlock; + } + + pthread_mutex_lock(&iterated_session->lock); + + if (!iterated_session->id_sessiond.is_set) { + /* + * Session belongs to a peer that doesn't support + * rotations. + */ + goto next_session; + } + + if (!lttng_uuid_is_equal(session->sessiond_uuid, + iterated_session->sessiond_uuid)) { + /* Sessions do not originate from the same sessiond. */ + goto next_session; + } + + if (LTTNG_OPTIONAL_GET(session->id_sessiond) != + LTTNG_OPTIONAL_GET(iterated_session->id_sessiond)) { + /* + * Sessions do not originate from the same sessiond + * session. + */ + goto next_session; + } + + ongoing_rotation = iterated_session->ongoing_rotation; + +next_session: + pthread_mutex_unlock(&iterated_session->lock); +next_session_no_unlock: + session_put(iterated_session); + + if (ongoing_rotation) { + break; + } + } + rcu_read_unlock(); + +end: + return ongoing_rotation; +} + static void rcu_destroy_session(struct rcu_head *rcu_head) { struct relay_session *session = diff --git a/src/bin/lttng-relayd/session.h b/src/bin/lttng-relayd/session.h index 3a9b92436..8029dcd8e 100644 --- a/src/bin/lttng-relayd/session.h +++ b/src/bin/lttng-relayd/session.h @@ -73,7 +73,7 @@ struct relay_session { struct urcu_ref ref; - pthread_mutex_t lock; + mutable pthread_mutex_t lock; /* major/minor version used for this session. */ uint32_t major; @@ -156,6 +156,8 @@ void session_put(struct relay_session *session); int session_close(struct relay_session *session); int session_abort(struct relay_session *session); +bool session_has_ongoing_rotation(const struct relay_session *session); + void print_sessions(void); #endif /* _SESSION_H */ -- 2.34.1