Fix: sessiond: bad fd used while rotating exiting app's buffers
[lttng-tools.git] / src / bin / lttng-sessiond / ust-app.c
index c5944ac4c7a0cc6feeb59c2220ae73617aa08333..b0e9982de80c44f415bc617c82bc8451ae8971da 100644 (file)
@@ -116,14 +116,12 @@ static int ht_match_ust_app_event(struct cds_lfht_node *node, const void *_key)
 {
        struct ust_app_event *event;
        const struct ust_app_ht_key *key;
-       int ev_loglevel_value;
 
        assert(node);
        assert(_key);
 
        event = caa_container_of(node, struct ust_app_event, node.node);
        key = _key;
-       ev_loglevel_value = event->attr.loglevel;
 
        /* Match the 4 elements of the key: name, filter, loglevel, exclusions */
 
@@ -133,19 +131,10 @@ static int ht_match_ust_app_event(struct cds_lfht_node *node, const void *_key)
        }
 
        /* Event loglevel. */
-       if (ev_loglevel_value != key->loglevel_type) {
-               if (event->attr.loglevel_type == LTTNG_UST_ABI_LOGLEVEL_ALL
-                               && key->loglevel_type == 0 &&
-                               ev_loglevel_value == -1) {
-                       /*
-                        * Match is accepted. This is because on event creation, the
-                        * loglevel is set to -1 if the event loglevel type is ALL so 0 and
-                        * -1 are accepted for this loglevel type since 0 is the one set by
-                        * the API when receiving an enable event.
-                        */
-               } else {
-                       goto no_match;
-               }
+       if (!loglevels_match(event->attr.loglevel_type, event->attr.loglevel,
+                           key->loglevel_type, key->loglevel_value,
+                           LTTNG_UST_ABI_LOGLEVEL_ALL)) {
+               goto no_match;
        }
 
        /* One of the filters is NULL, fail. */
@@ -202,7 +191,9 @@ static void add_unique_ust_app_event(struct ust_app_channel *ua_chan,
        ht = ua_chan->events;
        key.name = event->attr.name;
        key.filter = event->filter;
-       key.loglevel_type = event->attr.loglevel;
+       key.loglevel_type = (enum lttng_ust_abi_loglevel_type)
+                                           event->attr.loglevel_type;
+       key.loglevel_value = event->attr.loglevel;
        key.exclusion = event->exclusion;
 
        node_ptr = cds_lfht_add_unique(ht->ht,
@@ -1499,7 +1490,9 @@ error:
  * Return an ust_app_event object or NULL on error.
  */
 static struct ust_app_event *find_ust_app_event(struct lttng_ht *ht,
-               const char *name, const struct lttng_bytecode *filter,
+               const char *name,
+               const struct lttng_bytecode *filter,
+               enum lttng_ust_abi_loglevel_type loglevel_type,
                int loglevel_value,
                const struct lttng_event_exclusion *exclusion)
 {
@@ -1514,7 +1507,8 @@ static struct ust_app_event *find_ust_app_event(struct lttng_ht *ht,
        /* Setup key for event lookup. */
        key.name = name;
        key.filter = filter;
-       key.loglevel_type = loglevel_value;
+       key.loglevel_type = loglevel_type;
+       key.loglevel_value = loglevel_value;
        /* lttng_event_exclusion and lttng_ust_event_exclusion structures are similar */
        key.exclusion = exclusion;
 
@@ -2151,7 +2145,7 @@ static int init_ust_event_notifier_from_event_rule(
 
        event_notifier->event.instrumentation = LTTNG_UST_ABI_TRACEPOINT;
        ret = lttng_strncpy(event_notifier->event.name, pattern,
-                       LTTNG_UST_ABI_SYM_NAME_LEN - 1);
+                       sizeof(event_notifier->event.name));
        if (ret) {
                ERR("Failed to copy event rule pattern to notifier: pattern = '%s' ",
                                pattern);
@@ -3317,7 +3311,7 @@ static int send_channel_uid_to_ust(struct buffer_reg_channel *buf_reg_chan,
        /* Send all streams to application. */
        pthread_mutex_lock(&buf_reg_chan->stream_list_lock);
        cds_list_for_each_entry(reg_stream, &buf_reg_chan->streams, lnode) {
-               struct ust_app_stream stream;
+               struct ust_app_stream stream = {};
 
                ret = duplicate_stream_object(reg_stream, &stream);
                if (ret < 0) {
@@ -3334,8 +3328,8 @@ static int send_channel_uid_to_ust(struct buffer_reg_channel *buf_reg_chan,
                                 * Treat this the same way as an application
                                 * that is exiting.
                                 */
-                               WARN("Communication with application %d timed out on send_stream for stream \"%s\" of channel \"%s\" of session \"%" PRIu64 "\".",
-                                               app->pid, stream.name,
+                               WARN("Communication with application %d timed out on send_stream for stream of channel \"%s\" of session \"%" PRIu64 "\".",
+                                               app->pid,
                                                ua_chan->name,
                                                ua_sess->tracing_id);
                                ret = -ENOTCONN;
@@ -3989,6 +3983,8 @@ struct ust_app *ust_app_create(struct ust_register_msg *msg, int sock)
                goto error_free_pipe;
        }
 
+       urcu_ref_init(&lta->ref);
+
        lta->event_notifier_group.event_pipe = event_notifier_event_source_pipe;
 
        lta->ppid = msg->ppid;
@@ -4227,49 +4223,32 @@ error:
        return ret;
 }
 
-/*
- * Unregister app by removing it from the global traceable app list and freeing
- * the data struct.
- *
- * The socket is already closed at this point so no close to sock.
- */
-void ust_app_unregister(int sock)
+static void ust_app_unregister(struct ust_app *app)
 {
-       struct ust_app *lta;
-       struct lttng_ht_node_ulong *node;
-       struct lttng_ht_iter ust_app_sock_iter;
+       int ret;
        struct lttng_ht_iter iter;
        struct ust_app_session *ua_sess;
-       int ret;
 
        rcu_read_lock();
 
-       /* Get the node reference for a call_rcu */
-       lttng_ht_lookup(ust_app_ht_by_sock, (void *)((unsigned long) sock), &ust_app_sock_iter);
-       node = lttng_ht_iter_get_node_ulong(&ust_app_sock_iter);
-       assert(node);
-
-       lta = caa_container_of(node, struct ust_app, sock_n);
-       DBG("PID %d unregistering with sock %d", lta->pid, sock);
-
        /*
         * For per-PID buffers, perform "push metadata" and flush all
         * application streams before removing app from hash tables,
         * ensuring proper behavior of data_pending check.
         * Remove sessions so they are not visible during deletion.
         */
-       cds_lfht_for_each_entry(lta->sessions->ht, &iter.iter, ua_sess,
+       cds_lfht_for_each_entry(app->sessions->ht, &iter.iter, ua_sess,
                        node.node) {
                struct ust_registry_session *registry;
 
-               ret = lttng_ht_del(lta->sessions, &iter);
+               ret = lttng_ht_del(app->sessions, &iter);
                if (ret) {
                        /* The session was already removed so scheduled for teardown. */
                        continue;
                }
 
                if (ua_sess->buffer_type == LTTNG_BUFFER_PER_PID) {
-                       (void) ust_app_flush_app_session(lta, ua_sess);
+                       (void) ust_app_flush_app_session(app, ua_sess);
                }
 
                /*
@@ -4310,41 +4289,63 @@ void ust_app_unregister(int sock)
                                (void) close_metadata(registry, ua_sess->consumer);
                        }
                }
-               cds_list_add(&ua_sess->teardown_node, &lta->teardown_head);
 
+               cds_list_add(&ua_sess->teardown_node, &app->teardown_head);
                pthread_mutex_unlock(&ua_sess->lock);
        }
 
-       /* Remove application from PID hash table */
-       ret = lttng_ht_del(ust_app_ht_by_sock, &ust_app_sock_iter);
-       assert(!ret);
-
        /*
         * Remove application from notify hash table. The thread handling the
         * notify socket could have deleted the node so ignore on error because
         * either way it's valid. The close of that socket is handled by the
         * apps_notify_thread.
         */
-       iter.iter.node = &lta->notify_sock_n.node;
+       iter.iter.node = &app->notify_sock_n.node;
        (void) lttng_ht_del(ust_app_ht_by_notify_sock, &iter);
 
-       /*
-        * Ignore return value since the node might have been removed before by an
-        * add replace during app registration because the PID can be reassigned by
-        * the OS.
-        */
-       iter.iter.node = &lta->pid_n.node;
+       iter.iter.node = &app->pid_n.node;
        ret = lttng_ht_del(ust_app_ht, &iter);
        if (ret) {
-               DBG3("Unregister app by PID %d failed. This can happen on pid reuse",
-                               lta->pid);
+               WARN("Unregister app by PID %d failed", app->pid);
        }
 
-       /* Free memory */
-       call_rcu(&lta->pid_n.head, delete_ust_app_rcu);
+       rcu_read_unlock();
+}
 
+/*
+ * Unregister app by removing it from the global traceable app list and freeing
+ * the data struct.
+ *
+ * The socket is already closed at this point, so there is no need to close it.
+ */
+void ust_app_unregister_by_socket(int sock)
+{
+       struct ust_app *app;
+       struct lttng_ht_node_ulong *node;
+       struct lttng_ht_iter ust_app_sock_iter;
+       int ret;
+
+       rcu_read_lock();
+
+       /* Get the node reference for a call_rcu */
+       lttng_ht_lookup(ust_app_ht_by_sock, (void *)((unsigned long) sock), &ust_app_sock_iter);
+       node = lttng_ht_iter_get_node_ulong(&ust_app_sock_iter);
+       assert(node);
+
+       app = caa_container_of(node, struct ust_app, sock_n);
+
+       DBG("PID %d unregistering with sock %d", app->pid, sock);
+
+       /* Remove application from socket hash table */
+       ret = lttng_ht_del(ust_app_ht_by_sock, &ust_app_sock_iter);
+       assert(!ret);
+
+       /*
+        * The socket is closed: release its reference to the application
+        * to trigger its eventual teardown.
+        */
+       ust_app_put(app);
        rcu_read_unlock();
-       return;
 }
 
 /*
@@ -4640,25 +4641,18 @@ void ust_app_clean_list(void)
                         * are unregistered prior to this clean-up.
                         */
                        assert(lttng_ht_get_count(app->token_to_event_notifier_rule_ht) == 0);
-
                        ust_app_notify_sock_unregister(app->notify_sock);
                }
        }
 
-       if (ust_app_ht) {
-               cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, pid_n.node) {
-                       ret = lttng_ht_del(ust_app_ht, &iter);
-                       assert(!ret);
-                       call_rcu(&app->pid_n.head, delete_ust_app_rcu);
-               }
-       }
-
        /* Cleanup socket hash table */
        if (ust_app_ht_by_sock) {
                cds_lfht_for_each_entry(ust_app_ht_by_sock->ht, &iter.iter, app,
                                sock_n.node) {
                        ret = lttng_ht_del(ust_app_ht_by_sock, &iter);
                        assert(!ret);
+
+                       ust_app_put(app);
                }
        }
 
@@ -4841,9 +4835,11 @@ int ust_app_disable_event_glb(struct ltt_ust_session *usess,
                }
                ua_chan = caa_container_of(ua_chan_node, struct ust_app_channel, node);
 
-               ua_event = find_ust_app_event(ua_chan->events, uevent->attr.name,
-                               uevent->filter, uevent->attr.loglevel,
-                               uevent->exclusion);
+               ua_event = find_ust_app_event(ua_chan->events,
+                               uevent->attr.name, uevent->filter,
+                               (enum lttng_ust_abi_loglevel_type)
+                                               uevent->attr.loglevel_type,
+                               uevent->attr.loglevel, uevent->exclusion);
                if (ua_event == NULL) {
                        DBG2("Event %s not found in channel %s for app pid %d."
                                        "Skipping", uevent->attr.name, uchan->name, app->pid);
@@ -5001,8 +4997,11 @@ int ust_app_enable_event_glb(struct ltt_ust_session *usess,
                ua_chan = caa_container_of(ua_chan_node, struct ust_app_channel, node);
 
                /* Get event node */
-               ua_event = find_ust_app_event(ua_chan->events, uevent->attr.name,
-                               uevent->filter, uevent->attr.loglevel, uevent->exclusion);
+               ua_event = find_ust_app_event(ua_chan->events,
+                               uevent->attr.name, uevent->filter,
+                               (enum lttng_ust_abi_loglevel_type)
+                                               uevent->attr.loglevel_type,
+                               uevent->attr.loglevel, uevent->exclusion);
                if (ua_event == NULL) {
                        DBG3("UST app enable event %s not found for app PID %d."
                                        "Skipping app", uevent->attr.name, app->pid);
@@ -5776,7 +5775,10 @@ int ust_app_channel_synchronize_event(struct ust_app_channel *ua_chan,
        struct ust_app_event *ua_event = NULL;
 
        ua_event = find_ust_app_event(ua_chan->events, uevent->attr.name,
-               uevent->filter, uevent->attr.loglevel, uevent->exclusion);
+                       uevent->filter,
+                       (enum lttng_ust_abi_loglevel_type)
+                                       uevent->attr.loglevel_type,
+                       uevent->attr.loglevel, uevent->exclusion);
        if (!ua_event) {
                ret = create_ust_app_event(ua_sess, ua_chan, uevent, app);
                if (ret < 0) {
@@ -6323,6 +6325,104 @@ error:
        return ua_chan;
 }
 
+/*
+ * Fixup legacy context fields for comparison:
+ * - legacy array becomes array_nestable,
+ * - legacy struct becomes struct_nestable,
+ * - legacy variant becomes variant_nestable,
+ * legacy sequences are not emitted in LTTng-UST contexts.
+ */
+static int ust_app_fixup_legacy_context_fields(size_t *_nr_fields,
+               struct lttng_ust_ctl_field **_fields)
+{
+       struct lttng_ust_ctl_field *fields = *_fields, *new_fields = NULL;
+       size_t nr_fields = *_nr_fields, new_nr_fields = 0, i, j;
+       bool found = false;
+       int ret = 0;
+
+       for (i = 0; i < nr_fields; i++) {
+               const struct lttng_ust_ctl_field *field = &fields[i];
+
+               switch (field->type.atype) {
+               case lttng_ust_ctl_atype_sequence:
+                       ERR("Unexpected legacy sequence context.");
+                       ret = -EINVAL;
+                       goto end;
+               case lttng_ust_ctl_atype_array:
+                       switch (field->type.u.legacy.array.elem_type.atype) {
+                       case lttng_ust_ctl_atype_integer:
+                               break;
+                       default:
+                               ERR("Unexpected legacy array element type in context.");
+                               ret = -EINVAL;
+                               goto end;
+                       }
+                       found = true;
+                       /* One field for array_nested, one field for elem type. */
+                       new_nr_fields += 2;
+                       break;
+
+               case lttng_ust_ctl_atype_struct:        /* Fallthrough */
+               case lttng_ust_ctl_atype_variant:
+                       found = true;
+                       new_nr_fields++;
+                       break;
+               default:
+                       new_nr_fields++;
+                       break;
+               }
+       }
+       if (!found) {
+               goto end;
+       }
+       new_fields = (struct lttng_ust_ctl_field *) zmalloc(sizeof(*new_fields) * new_nr_fields);
+       if (!new_fields) {
+               ret = -ENOMEM;
+               goto end;
+       }
+       for (i = 0, j = 0; i < nr_fields; i++, j++) {
+               const struct lttng_ust_ctl_field *field = &fields[i];
+               struct lttng_ust_ctl_field *new_field = &new_fields[j];
+
+               switch (field->type.atype) {
+               case lttng_ust_ctl_atype_array:
+                       /* One field for array_nested, one field for elem type. */
+                       strncpy(new_field->name, field->name, LTTNG_UST_ABI_SYM_NAME_LEN - 1);
+                       new_field->type.atype = lttng_ust_ctl_atype_array_nestable;
+                       new_field->type.u.array_nestable.length = field->type.u.legacy.array.length;
+                       new_field->type.u.array_nestable.alignment = 0;
+                       new_field = &new_fields[++j];   /* elem type */
+                       new_field->type.atype = field->type.u.legacy.array.elem_type.atype;
+                       assert(new_field->type.atype == lttng_ust_ctl_atype_integer);
+                       new_field->type.u.integer = field->type.u.legacy.array.elem_type.u.basic.integer;
+                       break;
+               case lttng_ust_ctl_atype_struct:
+                       strncpy(new_field->name, field->name, LTTNG_UST_ABI_SYM_NAME_LEN - 1);
+                       new_field->type.atype = lttng_ust_ctl_atype_struct_nestable;
+                       new_field->type.u.struct_nestable.nr_fields = field->type.u.legacy._struct.nr_fields;
+                       new_field->type.u.struct_nestable.alignment = 0;
+                       break;
+               case lttng_ust_ctl_atype_variant:
+                       strncpy(new_field->name, field->name, LTTNG_UST_ABI_SYM_NAME_LEN - 1);
+                       new_field->type.atype = lttng_ust_ctl_atype_variant_nestable;
+                       new_field->type.u.variant_nestable.nr_choices = field->type.u.legacy.variant.nr_choices;
+                       strncpy(new_field->type.u.variant_nestable.tag_name,
+                               field->type.u.legacy.variant.tag_name,
+                               LTTNG_UST_ABI_SYM_NAME_LEN - 1);
+                       new_field->type.u.variant_nestable.alignment = 0;
+                       break;
+               default:
+                       *new_field = *field;
+                       break;
+               }
+       }
+       free(fields);
+       *_fields = new_fields;
+       *_nr_fields = new_nr_fields;
+end:
+       return ret;
+}
+
 /*
  * Reply to a register channel notification from an application on the notify
  * socket. The channel metadata is also created.
@@ -6337,7 +6437,7 @@ static int reply_ust_register_channel(int sock, int cobjd,
        int ret, ret_code = 0;
        uint32_t chan_id;
        uint64_t chan_reg_key;
-       enum lttng_ust_ctl_channel_header type;
+       enum lttng_ust_ctl_channel_header type = LTTNG_UST_CTL_CHANNEL_HEADER_UNKNOWN;
        struct ust_app *app;
        struct ust_app_channel *ua_chan;
        struct ust_app_session *ua_sess;
@@ -6389,6 +6489,13 @@ static int reply_ust_register_channel(int sock, int cobjd,
        /* Channel id is set during the object creation. */
        chan_id = ust_reg_chan->chan_id;
 
+       ret = ust_app_fixup_legacy_context_fields(&nr_fields, &fields);
+       if (ret < 0) {
+               ERR("Registering application channel due to legacy context fields fixup error: pid = %d, sock = %d",
+                       app->pid, app->sock);
+               ret_code = -EINVAL;
+               goto reply;
+       }
        if (!ust_reg_chan->register_done) {
                /*
                 * TODO: eventually use the registry event count for
@@ -6883,7 +6990,7 @@ close_socket:
 /*
  * Destroy a ust app data structure and free its memory.
  */
-void ust_app_destroy(struct ust_app *app)
+static void ust_app_destroy(struct ust_app *app)
 {
        if (!app) {
                return;
@@ -7310,7 +7417,7 @@ enum lttng_error_code ust_app_rotate_session(struct ltt_session *session)
        int ret;
        enum lttng_error_code cmd_ret = LTTNG_OK;
        struct lttng_ht_iter iter;
-       struct ust_app *app;
+       struct ust_app *app = NULL;
        struct ltt_ust_session *usess = session->ust_session;
 
        assert(usess);
@@ -7383,10 +7490,20 @@ enum lttng_error_code ust_app_rotate_session(struct ltt_session *session)
                        struct ust_app_channel *ua_chan;
                        struct ust_app_session *ua_sess;
                        struct ust_registry_session *registry;
+                       bool app_reference_taken;
+
+                       app_reference_taken = ust_app_get(app);
+                       if (!app_reference_taken) {
+                               /* Application unregistered concurrently, skip it. */
+                               DBG("Could not get application reference as it is being torn down; skipping application");
+                               continue;
+                       }
 
                        ua_sess = lookup_session_by_app(usess, app);
                        if (!ua_sess) {
                                /* Session not associated with this app. */
+                               ust_app_put(app);
+                               app = NULL;
                                continue;
                        }
 
@@ -7398,11 +7515,9 @@ enum lttng_error_code ust_app_rotate_session(struct ltt_session *session)
                                goto error;
                        }
 
+
                        registry = get_session_registry(ua_sess);
-                       if (!registry) {
-                               DBG("Application session is being torn down. Skip application.");
-                               continue;
-                       }
+                       assert(registry);
 
                        /* Rotate the data channels. */
                        cds_lfht_for_each_entry(ua_sess->channels->ht, &chan_iter.iter,
@@ -7414,9 +7529,6 @@ enum lttng_error_code ust_app_rotate_session(struct ltt_session *session)
                                                ua_sess->consumer,
                                                /* is_metadata_channel */ false);
                                if (ret < 0) {
-                                       /* Per-PID buffer and application going away. */
-                                       if (ret == -LTTNG_ERR_CHAN_NOT_FOUND)
-                                               continue;
                                        cmd_ret = LTTNG_ERR_ROTATION_FAIL_CONSUMER;
                                        goto error;
                                }
@@ -7431,13 +7543,15 @@ enum lttng_error_code ust_app_rotate_session(struct ltt_session *session)
                                        ua_sess->consumer,
                                        /* is_metadata_channel */ true);
                        if (ret < 0) {
-                               /* Per-PID buffer and application going away. */
-                               if (ret == -LTTNG_ERR_CHAN_NOT_FOUND)
-                                       continue;
                                cmd_ret = LTTNG_ERR_ROTATION_FAIL_CONSUMER;
                                goto error;
                        }
+
+                       ust_app_put(app);
+                       app = NULL;
                }
+
+               app = NULL;
                break;
        }
        default:
@@ -7448,6 +7562,7 @@ enum lttng_error_code ust_app_rotate_session(struct ltt_session *session)
        cmd_ret = LTTNG_OK;
 
 error:
+       ust_app_put(app);
        rcu_read_unlock();
        return cmd_ret;
 }
@@ -7829,3 +7944,26 @@ error:
        rcu_read_unlock();
        return ret;
 }
+
+static void ust_app_release(struct urcu_ref *ref)
+{
+       struct ust_app *app = container_of(ref, struct ust_app, ref);
+
+       ust_app_unregister(app);
+       ust_app_destroy(app);
+}
+
+bool ust_app_get(struct ust_app *app)
+{
+       assert(app);
+       return urcu_ref_get_unless_zero(&app->ref);
+}
+
+void ust_app_put(struct ust_app *app)
+{
+       if (!app) {
+               return;
+       }
+
+       urcu_ref_put(&app->ref, ust_app_release);
+}
This page took 0.030402 seconds and 4 git commands to generate.