X-Git-Url: https://git.liburcu.org/?a=blobdiff_plain;f=src%2Fbin%2Flttng-sessiond%2Fust-app.c;h=b0e9982de80c44f415bc617c82bc8451ae8971da;hb=3faa1e3d9bb6b6cd1fa370c206614af9061815be;hp=c00dd2877fd08d1aed942ac1fce52ab4ff25d3c8;hpb=27a3be486f8cc9364d4861b7d163ad3e1426e9b3;p=lttng-tools.git diff --git a/src/bin/lttng-sessiond/ust-app.c b/src/bin/lttng-sessiond/ust-app.c index c00dd2877..b0e9982de 100644 --- a/src/bin/lttng-sessiond/ust-app.c +++ b/src/bin/lttng-sessiond/ust-app.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011 David Goulet + * Copyright (C) 2011 EfficiOS Inc. * Copyright (C) 2016 Jérémie Galarneau * * SPDX-License-Identifier: GPL-2.0-only @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include @@ -49,7 +49,7 @@ #include "rotate.h" #include "event.h" #include "event-notifier-error-accounting.h" - +#include "ust-field-utils.h" struct lttng_ht *ust_app_ht; struct lttng_ht *ust_app_ht_by_sock; @@ -116,14 +116,12 @@ static int ht_match_ust_app_event(struct cds_lfht_node *node, const void *_key) { struct ust_app_event *event; const struct ust_app_ht_key *key; - int ev_loglevel_value; assert(node); assert(_key); event = caa_container_of(node, struct ust_app_event, node.node); key = _key; - ev_loglevel_value = event->attr.loglevel; /* Match the 4 elements of the key: name, filter, loglevel, exclusions */ @@ -133,19 +131,10 @@ static int ht_match_ust_app_event(struct cds_lfht_node *node, const void *_key) } /* Event loglevel. */ - if (ev_loglevel_value != key->loglevel_type) { - if (event->attr.loglevel_type == LTTNG_UST_ABI_LOGLEVEL_ALL - && key->loglevel_type == 0 && - ev_loglevel_value == -1) { - /* - * Match is accepted. This is because on event creation, the - * loglevel is set to -1 if the event loglevel type is ALL so 0 and - * -1 are accepted for this loglevel type since 0 is the one set by - * the API when receiving an enable event. - */ - } else { - goto no_match; - } + if (!loglevels_match(event->attr.loglevel_type, event->attr.loglevel, + key->loglevel_type, key->loglevel_value, + LTTNG_UST_ABI_LOGLEVEL_ALL)) { + goto no_match; } /* One of the filters is NULL, fail. */ @@ -202,7 +191,9 @@ static void add_unique_ust_app_event(struct ust_app_channel *ua_chan, ht = ua_chan->events; key.name = event->attr.name; key.filter = event->filter; - key.loglevel_type = event->attr.loglevel; + key.loglevel_type = (enum lttng_ust_abi_loglevel_type) + event->attr.loglevel_type; + key.loglevel_value = event->attr.loglevel; key.exclusion = event->exclusion; node_ptr = cds_lfht_add_unique(ht->ht, @@ -292,9 +283,18 @@ void delete_ust_app_ctx(int sock, struct ust_app_ctx *ua_ctx, pthread_mutex_lock(&app->sock_lock); ret = lttng_ust_ctl_release_object(sock, ua_ctx->obj); pthread_mutex_unlock(&app->sock_lock); - if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("UST app sock %d release ctx obj handle %d failed with ret %d", - sock, ua_ctx->obj->handle, ret); + if (ret < 0) { + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + DBG3("UST app release ctx failed. Application is dead: pid = %d, sock = %d", + app->pid, app->sock); + } else if (ret == -EAGAIN) { + WARN("UST app release ctx failed. Communication time out: pid = %d, sock = %d", + app->pid, app->sock); + } else { + ERR("UST app release ctx obj handle %d failed with ret %d: pid = %d, sock = %d", + ua_ctx->obj->handle, ret, + app->pid, app->sock); + } } free(ua_ctx->obj); } @@ -320,9 +320,17 @@ void delete_ust_app_event(int sock, struct ust_app_event *ua_event, pthread_mutex_lock(&app->sock_lock); ret = lttng_ust_ctl_release_object(sock, ua_event->obj); pthread_mutex_unlock(&app->sock_lock); - if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("UST app sock %d release event obj failed with ret %d", - sock, ret); + if (ret < 0) { + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + DBG3("UST app release event failed. Application is dead: pid = %d, sock = %d", + app->pid, app->sock); + } else if (ret == -EAGAIN) { + WARN("UST app release event failed. Communication time out: pid = %d, sock = %d", + app->pid, app->sock); + } else { + ERR("UST app release event obj failed with ret %d: pid = %d, sock = %d", + ret, app->pid, app->sock); + } } free(ua_event->obj); } @@ -361,9 +369,17 @@ static void delete_ust_app_event_notifier_rule(int sock, pthread_mutex_lock(&app->sock_lock); ret = lttng_ust_ctl_release_object(sock, ua_event_notifier_rule->obj); pthread_mutex_unlock(&app->sock_lock); - if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("Failed to release event notifier object: app = '%s' (ppid %d), ret = %d", - app->name, (int) app->ppid, ret); + if (ret < 0) { + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + DBG3("UST app release event notifier failed. Application is dead: pid = %d, sock = %d", + app->pid, app->sock); + } else if (ret == -EAGAIN) { + WARN("UST app release event notifier failed. Communication time out: pid = %d, sock = %d", + app->pid, app->sock); + } else { + ERR("UST app release event notifier failed with ret %d: pid = %d, sock = %d", + ret, app->pid, app->sock); + } } free(ua_event_notifier_rule->obj); @@ -390,9 +406,17 @@ static int release_ust_app_stream(int sock, struct ust_app_stream *stream, pthread_mutex_lock(&app->sock_lock); ret = lttng_ust_ctl_release_object(sock, stream->obj); pthread_mutex_unlock(&app->sock_lock); - if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("UST app sock %d release stream obj failed with ret %d", - sock, ret); + if (ret < 0) { + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + DBG3("UST app release stream failed. Application is dead: pid = %d, sock = %d", + app->pid, app->sock); + } else if (ret == -EAGAIN) { + WARN("UST app release stream failed. Communication time out: pid = %d, sock = %d", + app->pid, app->sock); + } else { + ERR("UST app release stream obj failed with ret %d: pid = %d, sock = %d", + ret, app->pid, app->sock); + } } lttng_fd_put(LTTNG_FD_APPS, 2); free(stream->obj); @@ -566,9 +590,20 @@ void delete_ust_app_channel(int sock, struct ust_app_channel *ua_chan, pthread_mutex_lock(&app->sock_lock); ret = lttng_ust_ctl_release_object(sock, ua_chan->obj); pthread_mutex_unlock(&app->sock_lock); - if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("UST app sock %d release channel obj failed with ret %d", - sock, ret); + if (ret < 0) { + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + DBG3("UST app channel %s release failed. Application is dead: pid = %d, sock = %d", + ua_chan->name, app->pid, + app->sock); + } else if (ret == -EAGAIN) { + WARN("UST app channel %s release failed. Communication time out: pid = %d, sock = %d", + ua_chan->name, app->pid, + app->sock); + } else { + ERR("UST app channel %s release failed with ret %d: pid = %d, sock = %d", + ua_chan->name, ret, app->pid, + app->sock); + } } lttng_fd_put(LTTNG_FD_APPS, 1); free(ua_chan->obj); @@ -931,10 +966,19 @@ void delete_ust_app_session(int sock, struct ust_app_session *ua_sess, pthread_mutex_lock(&app->sock_lock); ret = lttng_ust_ctl_release_handle(sock, ua_sess->handle); pthread_mutex_unlock(&app->sock_lock); - if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("UST app sock %d release session handle failed with ret %d", - sock, ret); + if (ret < 0) { + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + DBG3("UST app release session handle failed. Application is dead: pid = %d, sock = %d", + app->pid, app->sock); + } else if (ret == -EAGAIN) { + WARN("UST app release session handle failed. Communication time out: pid = %d, sock = %d", + app->pid, app->sock); + } else { + ERR("UST app release session handle failed with ret %d: pid = %d, sock = %d", + ret, app->pid, app->sock); + } } + /* Remove session from application UST object descriptor. */ iter.iter.node = &ua_sess->ust_objd_node.node; ret = lttng_ht_del(app->ust_sessions_objd, &iter); @@ -1249,6 +1293,7 @@ static struct ust_app_event_notifier_rule *alloc_ust_app_event_notifier_rule( { enum lttng_event_rule_generate_exclusions_status generate_exclusion_status; + enum lttng_condition_status cond_status; struct ust_app_event_notifier_rule *ua_event_notifier_rule; struct lttng_condition *condition = NULL; const struct lttng_event_rule *event_rule = NULL; @@ -1269,9 +1314,9 @@ static struct ust_app_event_notifier_rule *alloc_ust_app_event_notifier_rule( assert(lttng_condition_get_type(condition) == LTTNG_CONDITION_TYPE_EVENT_RULE_MATCHES); - assert(LTTNG_CONDITION_STATUS_OK == - lttng_condition_event_rule_matches_get_rule( - condition, &event_rule)); + cond_status = lttng_condition_event_rule_matches_get_rule( + condition, &event_rule); + assert(cond_status == LTTNG_CONDITION_STATUS_OK); assert(event_rule); ua_event_notifier_rule->error_counter_index = @@ -1288,7 +1333,7 @@ static struct ust_app_event_notifier_rule *alloc_ust_app_event_notifier_rule( case LTTNG_EVENT_RULE_GENERATE_EXCLUSIONS_STATUS_NONE: break; default: - /* Error occured. */ + /* Error occurred. */ ERR("Failed to generate exclusions from trigger while allocating an event notifier rule"); goto error_put_trigger; } @@ -1445,7 +1490,9 @@ error: * Return an ust_app_event object or NULL on error. */ static struct ust_app_event *find_ust_app_event(struct lttng_ht *ht, - const char *name, const struct lttng_bytecode *filter, + const char *name, + const struct lttng_bytecode *filter, + enum lttng_ust_abi_loglevel_type loglevel_type, int loglevel_value, const struct lttng_event_exclusion *exclusion) { @@ -1460,7 +1507,8 @@ static struct ust_app_event *find_ust_app_event(struct lttng_ht *ht, /* Setup key for event lookup. */ key.name = name; key.filter = filter; - key.loglevel_type = loglevel_value; + key.loglevel_type = loglevel_type; + key.loglevel_value = loglevel_value; /* lttng_event_exclusion and lttng_ust_event_exclusion structures are similar */ key.exclusion = exclusion; @@ -1525,17 +1573,17 @@ int create_ust_channel_context(struct ust_app_channel *ua_chan, ua_chan->obj, &ua_ctx->obj); pthread_mutex_unlock(&app->sock_lock); if (ret < 0) { - if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("UST app create channel context failed for app (pid: %d) " - "with ret %d", app->pid, ret); - } else { - /* - * This is normal behavior, an application can die during the - * creation process. Don't report an error so the execution can - * continue normally. - */ + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { ret = 0; - DBG3("UST app add context failed. Application is dead."); + DBG3("UST app create channel context failed. Application is dead: pid = %d, sock = %d", + app->pid, app->sock); + } else if (ret == -EAGAIN) { + ret = 0; + WARN("UST app create channel context failed. Communication time out: pid = %d, sock = %d", + app->pid, app->sock); + } else { + ERR("UST app create channel context failed with ret %d: pid = %d, sock = %d", + ret, app->pid, app->sock); } goto error; } @@ -1572,17 +1620,17 @@ static int set_ust_object_filter(struct ust_app *app, ust_object); pthread_mutex_unlock(&app->sock_lock); if (ret < 0) { - if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("UST app set object filter failed: object = %p of app pid = %d, ret = %d", - ust_object, app->pid, ret); - } else { - /* - * This is normal behavior, an application can die during the - * creation process. Don't report an error so the execution can - * continue normally. - */ + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + ret = 0; + DBG3("UST app set filter failed. Application is dead: pid = %d, sock = %d", + app->pid, app->sock); + } else if (ret == -EAGAIN) { ret = 0; - DBG3("Failed to set UST app object filter. Application is dead."); + WARN("UST app set filter failed. Communication time out: pid = %d, sock = %d", + app->pid, app->sock); + } else { + ERR("UST app set filter failed with ret %d: pid = %d, sock = %d, object = %p", + ret, app->pid, app->sock, ust_object); } goto error; } @@ -1626,17 +1674,18 @@ static int set_ust_capture(struct ust_app *app, ust_object); pthread_mutex_unlock(&app->sock_lock); if (ret < 0) { - if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("UST app set object capture failed: object = %p of app pid = %d, ret = %d", - ust_object, app->pid, ret); - } else { - /* - * This is normal behavior, an application can die during the - * creation process. Don't report an error so the execution can - * continue normally. - */ + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { ret = 0; - DBG3("Failed to set UST app object capture. Application is dead."); + DBG3("UST app set capture failed. Application is dead: pid = %d, sock = %d", + app->pid, app->sock); + } else if (ret == -EAGAIN) { + ret = 0; + DBG3("UST app set capture failed. Communication timeout: pid = %d, sock = %d", + app->pid, app->sock); + } else { + ERR("UST app event set capture failed with ret %d: pid = %d, sock = %d", + ret, app->pid, + app->sock); } goto error; @@ -1695,17 +1744,17 @@ static int set_ust_object_exclusions(struct ust_app *app, ret = lttng_ust_ctl_set_exclusion(app->sock, ust_exclusions, ust_object); pthread_mutex_unlock(&app->sock_lock); if (ret < 0) { - if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("Failed to set UST app exclusions for object %p of app (pid: %d) " - "with ret %d", ust_object, app->pid, ret); - } else { - /* - * This is normal behavior, an application can die during the - * creation process. Don't report an error so the execution can - * continue normally. - */ + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + ret = 0; + DBG3("UST app event exclusion failed. Application is dead: pid = %d, sock = %d", + app->pid, app->sock); + } else if (ret == -EAGAIN) { ret = 0; - DBG3("Failed to set UST app object exclusions. Application is dead."); + WARN("UST app event exclusion failed. Communication time out(pid: %d, sock = %d", + app->pid, app->sock); + } else { + ERR("UST app event exclusions failed with ret %d: pid = %d, sock = %d, object = %p", + ret, app->pid, app->sock, ust_object); } goto error; } @@ -1732,22 +1781,22 @@ static int disable_ust_object(struct ust_app *app, ret = lttng_ust_ctl_disable(app->sock, object); pthread_mutex_unlock(&app->sock_lock); if (ret < 0) { - if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("Failed to disable UST app object %p app (pid: %d) with ret %d", - object, app->pid, ret); - } else { - /* - * This is normal behavior, an application can die during the - * creation process. Don't report an error so the execution can - * continue normally. - */ + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { ret = 0; - DBG3("Failed to disable UST app object. Application is dead."); + DBG3("UST app disable object failed. Application is dead: pid = %d, sock = %d", + app->pid, app->sock); + } else if (ret == -EAGAIN) { + ret = 0; + WARN("UST app disable object failed. Communication time out: pid = %d, sock = %d", + app->pid, app->sock); + } else { + ERR("UST app disable object failed with ret %d: pid = %d, sock = %d, object = %p", + ret, app->pid, app->sock, object); } goto error; } - DBG2("UST app object %p disabled successfully for app (pid: %d)", + DBG2("UST app object %p disabled successfully for app: pid = %d", object, app->pid); error: @@ -1769,23 +1818,23 @@ static int disable_ust_channel(struct ust_app *app, ret = lttng_ust_ctl_disable(app->sock, ua_chan->obj); pthread_mutex_unlock(&app->sock_lock); if (ret < 0) { - if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("UST app channel %s disable failed for app (pid: %d) " - "and session handle %d with ret %d", - ua_chan->name, app->pid, ua_sess->handle, ret); - } else { - /* - * This is normal behavior, an application can die during the - * creation process. Don't report an error so the execution can - * continue normally. - */ + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + ret = 0; + DBG3("UST app disable channel failed. Application is dead: pid = %d, sock = %d", + app->pid, app->sock); + } else if (ret == -EAGAIN) { ret = 0; - DBG3("UST app disable channel failed. Application is dead."); + WARN("UST app disable channel failed. Communication time out: pid = %d, sock = %d", + app->pid, app->sock); + } else { + ERR("UST app channel %s disable failed, session handle %d, with ret %d: pid = %d, sock = %d", + ua_chan->name, ua_sess->handle, ret, + app->pid, app->sock); } goto error; } - DBG2("UST app channel %s disabled successfully for app (pid: %d)", + DBG2("UST app channel %s disabled successfully for app: pid = %d", ua_chan->name, app->pid); error: @@ -1807,25 +1856,25 @@ static int enable_ust_channel(struct ust_app *app, ret = lttng_ust_ctl_enable(app->sock, ua_chan->obj); pthread_mutex_unlock(&app->sock_lock); if (ret < 0) { - if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("UST app channel %s enable failed for app (pid: %d) " - "and session handle %d with ret %d", - ua_chan->name, app->pid, ua_sess->handle, ret); - } else { - /* - * This is normal behavior, an application can die during the - * creation process. Don't report an error so the execution can - * continue normally. - */ + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { ret = 0; - DBG3("UST app enable channel failed. Application is dead."); + DBG3("UST app channel %s enable failed. Application is dead: pid = %d, sock = %d", + ua_chan->name, app->pid, app->sock); + } else if (ret == -EAGAIN) { + ret = 0; + WARN("UST app channel %s enable failed. Communication time out: pid = %d, sock = %d", + ua_chan->name, app->pid, app->sock); + } else { + ERR("UST app channel %s enable failed, session handle %d, with ret %d: pid = %d, sock = %d", + ua_chan->name, ua_sess->handle, ret, + app->pid, app->sock); } goto error; } ua_chan->enabled = 1; - DBG2("UST app channel %s enabled successfully for app (pid: %d)", + DBG2("UST app channel %s enabled successfully for app: pid = %d", ua_chan->name, app->pid); error: @@ -1847,22 +1896,22 @@ static int enable_ust_object( ret = lttng_ust_ctl_enable(app->sock, ust_object); pthread_mutex_unlock(&app->sock_lock); if (ret < 0) { - if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("UST app enable failed for object %p app (pid: %d) with ret %d", - ust_object, app->pid, ret); - } else { - /* - * This is normal behavior, an application can die during the - * creation process. Don't report an error so the execution can - * continue normally. - */ + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + ret = 0; + DBG3("UST app enable object failed. Application is dead: pid = %d, sock = %d", + app->pid, app->sock); + } else if (ret == -EAGAIN) { ret = 0; - DBG3("Failed to enable UST app object. Application is dead."); + WARN("UST app enable object failed. Communication time out: pid = %d, sock = %d", + app->pid, app->sock); + } else { + ERR("UST app enable object failed with ret %d: pid = %d, sock = %d, object = %p", + ret, app->pid, app->sock, ust_object); } goto error; } - DBG2("UST app object %p enabled successfully for app (pid: %d)", + DBG2("UST app object %p enabled successfully for app: pid = %d", ust_object, app->pid); error: @@ -1895,6 +1944,13 @@ static int send_channel_pid_to_ust(struct ust_app *app, if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { ret = -ENOTCONN; /* Caused by app exiting. */ goto error; + } else if (ret == -EAGAIN) { + /* Caused by timeout. */ + WARN("Communication with application %d timed out on send_channel for channel \"%s\" of session \"%" PRIu64 "\".", + app->pid, ua_chan->name, ua_sess->tracing_id); + /* Treat this the same way as an application that is exiting. */ + ret = -ENOTCONN; + goto error; } else if (ret < 0) { goto error; } @@ -1905,8 +1961,18 @@ static int send_channel_pid_to_ust(struct ust_app *app, cds_list_for_each_entry_safe(stream, stmp, &ua_chan->streams.head, list) { ret = ust_consumer_send_stream_to_ust(app, ua_chan, stream); if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { - ret = -ENOTCONN; /* Caused by app exiting. */ + ret = -ENOTCONN; /* Caused by app exiting. */ goto error; + } else if (ret == -EAGAIN) { + /* Caused by timeout. */ + WARN("Communication with application %d timed out on send_stream for stream \"%s\" of channel \"%s\" of session \"%" PRIu64 "\".", + app->pid, stream->name, ua_chan->name, + ua_sess->tracing_id); + /* + * Treat this the same way as an application that is + * exiting. + */ + ret = -ENOTCONN; } else if (ret < 0) { goto error; } @@ -1941,25 +2007,25 @@ int create_ust_event(struct ust_app *app, struct ust_app_session *ua_sess, &ua_event->obj); pthread_mutex_unlock(&app->sock_lock); if (ret < 0) { - if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - abort(); - ERR("Error ustctl create event %s for app pid: %d with ret %d", - ua_event->attr.name, app->pid, ret); - } else { - /* - * This is normal behavior, an application can die during the - * creation process. Don't report an error so the execution can - * continue normally. - */ + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + ret = 0; + DBG3("UST app create event failed. Application is dead: pid = %d, sock = %d", + app->pid, app->sock); + } else if (ret == -EAGAIN) { ret = 0; - DBG3("UST app create event failed. Application is dead."); + WARN("UST app create event failed. Communication time out: pid = %d, sock = %d", + app->pid, app->sock); + } else { + ERR("UST app create event '%s' failed with ret %d: pid = %d, sock = %d", + ua_event->attr.name, ret, app->pid, + app->sock); } goto error; } ua_event->handle = ua_event->obj->handle; - DBG2("UST app event %s created successfully for pid:%d object: %p", + DBG2("UST app event %s created successfully for pid:%d object = %p", ua_event->attr.name, app->pid, ua_event->obj); health_code_update(); @@ -2022,9 +2088,6 @@ static int init_ust_event_notifier_from_event_rule( int loglevel = -1, ret = 0; const char *pattern; - /* For now only LTTNG_EVENT_RULE_TYPE_TRACEPOINT are supported. */ - assert(lttng_event_rule_get_type(rule) == - LTTNG_EVENT_RULE_TYPE_TRACEPOINT); memset(event_notifier, 0, sizeof(*event_notifier)); @@ -2042,13 +2105,16 @@ static int init_ust_event_notifier_from_event_rule( } else { const struct lttng_log_level_rule *log_level_rule; - status = lttng_event_rule_tracepoint_get_pattern(rule, &pattern); + assert(lttng_event_rule_get_type(rule) == + LTTNG_EVENT_RULE_TYPE_USER_TRACEPOINT); + + status = lttng_event_rule_user_tracepoint_get_name_pattern(rule, &pattern); if (status != LTTNG_EVENT_RULE_STATUS_OK) { /* At this point, this is a fatal error. */ abort(); } - status = lttng_event_rule_tracepoint_get_log_level_rule( + status = lttng_event_rule_user_tracepoint_get_log_level_rule( rule, &log_level_rule); if (status == LTTNG_EVENT_RULE_STATUS_UNSET) { ust_loglevel_type = LTTNG_UST_ABI_LOGLEVEL_ALL; @@ -2079,7 +2145,7 @@ static int init_ust_event_notifier_from_event_rule( event_notifier->event.instrumentation = LTTNG_UST_ABI_TRACEPOINT; ret = lttng_strncpy(event_notifier->event.name, pattern, - LTTNG_UST_ABI_SYM_NAME_LEN - 1); + sizeof(event_notifier->event.name)); if (ret) { ERR("Failed to copy event rule pattern to notifier: pattern = '%s' ", pattern); @@ -2106,6 +2172,7 @@ static int create_ust_event_notifier(struct ust_app *app, const struct lttng_event_rule *event_rule = NULL; unsigned int capture_bytecode_count = 0, i; enum lttng_condition_status cond_status; + enum lttng_event_rule_type event_rule_type; health_code_update(); assert(app->event_notifier_group.object); @@ -2121,7 +2188,14 @@ static int create_ust_event_notifier(struct ust_app *app, assert(condition_status == LTTNG_CONDITION_STATUS_OK); assert(event_rule); - assert(lttng_event_rule_get_type(event_rule) == LTTNG_EVENT_RULE_TYPE_TRACEPOINT); + + event_rule_type = lttng_event_rule_get_type(event_rule); + assert(event_rule_type == LTTNG_EVENT_RULE_TYPE_USER_TRACEPOINT || + event_rule_type == LTTNG_EVENT_RULE_TYPE_JUL_LOGGING || + event_rule_type == + LTTNG_EVENT_RULE_TYPE_LOG4J_LOGGING || + event_rule_type == + LTTNG_EVENT_RULE_TYPE_PYTHON_LOGGING); init_ust_event_notifier_from_event_rule(event_rule, &event_notifier); event_notifier.event.token = ua_event_notifier_rule->token; @@ -2134,28 +2208,26 @@ static int create_ust_event_notifier(struct ust_app *app, &ua_event_notifier_rule->obj); pthread_mutex_unlock(&app->sock_lock); if (ret < 0) { - if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("Error ustctl create event notifier: name = '%s', app = '%s' (ppid: %d), ret = %d", - event_notifier.event.name, app->name, - app->ppid, ret); - } else { - /* - * This is normal behavior, an application can die - * during the creation process. Don't report an error so - * the execution can continue normally. - */ + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { ret = 0; - DBG3("UST app create event notifier failed (application is dead): app = '%s' (ppid = %d)", - app->name, app->ppid); + DBG3("UST app create event notifier failed. Application is dead: pid = %d, sock = %d", + app->pid, app->sock); + } else if (ret == -EAGAIN) { + ret = 0; + WARN("UST app create event notifier failed. Communication time out: pid = %d, sock = %d", + app->pid, app->sock); + } else { + ERR("UST app create event notifier '%s' failed with ret %d: pid = %d, sock = %d", + event_notifier.event.name, ret, app->pid, + app->sock); } - goto error; } ua_event_notifier_rule->handle = ua_event_notifier_rule->obj->handle; - DBG2("UST app event notifier %s created successfully: app = '%s' (ppid: %d), object: %p", - event_notifier.event.name, app->name, app->ppid, + DBG2("UST app event notifier %s created successfully: app = '%s': pid = %d), object = %p", + event_notifier.event.name, app->name, app->pid, ua_event_notifier_rule->obj); health_code_update(); @@ -2631,18 +2703,17 @@ static int find_or_create_ust_app_session(struct ltt_ust_session *usess, ret = lttng_ust_ctl_create_session(app->sock); pthread_mutex_unlock(&app->sock_lock); if (ret < 0) { - if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("Creating session for app pid %d with ret %d", - app->pid, ret); - } else { - DBG("UST app creating session failed. Application is dead"); - /* - * This is normal behavior, an application can die during the - * creation process. Don't report an error so the execution can - * continue normally. This will get flagged ENOTCONN and the - * caller will handle it. - */ + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + DBG("UST app creating session failed. Application is dead: pid = %d, sock = %d", + app->pid, app->sock); ret = 0; + } else if (ret == -EAGAIN) { + DBG("UST app creating session failed. Communication time out: pid = %d, sock = %d", + app->pid, app->sock); + ret = 0; + } else { + ERR("UST app creating session failed with ret %d: pid = %d, sock =%d", + ret, app->pid, app->sock); } delete_ust_app_session(-1, ua_sess, app); if (ret != -ENOMEM) { @@ -3005,7 +3076,7 @@ static int duplicate_stream_object(struct buffer_reg_stream *reg_stream, assert(reg_stream); assert(stream); - /* Reserve the amount of file descriptor we need. */ + /* Duplicating a stream requires 2 new fds. Reserve them. */ ret = lttng_fd_get(LTTNG_FD_APPS, 2); if (ret < 0) { ERR("Exhausted number of available FD upon duplicate stream"); @@ -3041,7 +3112,7 @@ static int duplicate_channel_object(struct buffer_reg_channel *buf_reg_chan, assert(buf_reg_chan); assert(ua_chan); - /* Need two fds for the channel. */ + /* Duplicating a channel requires 1 new fd. Reserve it. */ ret = lttng_fd_get(LTTNG_FD_APPS, 1); if (ret < 0) { ERR("Exhausted number of available FD upon duplicate channel"); @@ -3224,6 +3295,13 @@ static int send_channel_uid_to_ust(struct buffer_reg_channel *buf_reg_chan, if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { ret = -ENOTCONN; /* Caused by app exiting. */ goto error; + } else if (ret == -EAGAIN) { + /* Caused by timeout. */ + WARN("Communication with application %d timed out on send_channel for channel \"%s\" of session \"%" PRIu64 "\".", + app->pid, ua_chan->name, ua_sess->tracing_id); + /* Treat this the same way as an application that is exiting. */ + ret = -ENOTCONN; + goto error; } else if (ret < 0) { goto error; } @@ -3233,7 +3311,7 @@ static int send_channel_uid_to_ust(struct buffer_reg_channel *buf_reg_chan, /* Send all streams to application. */ pthread_mutex_lock(&buf_reg_chan->stream_list_lock); cds_list_for_each_entry(reg_stream, &buf_reg_chan->streams, lnode) { - struct ust_app_stream stream; + struct ust_app_stream stream = {}; ret = duplicate_stream_object(reg_stream, &stream); if (ret < 0) { @@ -3242,10 +3320,21 @@ static int send_channel_uid_to_ust(struct buffer_reg_channel *buf_reg_chan, ret = ust_consumer_send_stream_to_ust(app, ua_chan, &stream); if (ret < 0) { - (void) release_ust_app_stream(-1, &stream, app); if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { ret = -ENOTCONN; /* Caused by app exiting. */ + } else if (ret == -EAGAIN) { + /* + * Caused by timeout. + * Treat this the same way as an application + * that is exiting. + */ + WARN("Communication with application %d timed out on send_stream for stream of channel \"%s\" of session \"%" PRIu64 "\".", + app->pid, + ua_chan->name, + ua_sess->tracing_id); + ret = -ENOTCONN; } + (void) release_ust_app_stream(-1, &stream, app); goto error_stream_unlock; } @@ -3640,8 +3729,8 @@ int create_ust_app_event(struct ust_app_session *ua_sess, add_unique_ust_app_event(ua_chan, ua_event); - DBG2("UST app create event completed: app = '%s' (ppid: %d)", - app->name, app->ppid); + DBG2("UST app create event completed: app = '%s' pid = %d", + app->name, app->pid); end: return ret; @@ -3693,8 +3782,8 @@ int create_ust_app_event_notifier_rule(struct lttng_trigger *trigger, lttng_ht_add_unique_u64(app->token_to_event_notifier_rule_ht, &ua_event_notifier_rule->node); - DBG2("UST app create token event rule completed: app = '%s' (ppid: %d), token = %" PRIu64, - app->name, app->ppid, lttng_trigger_get_tracer_token(trigger)); + DBG2("UST app create token event rule completed: app = '%s', pid = %d), token = %" PRIu64, + app->name, app->pid, lttng_trigger_get_tracer_token(trigger)); goto end; @@ -3876,15 +3965,15 @@ struct ust_app *ust_app_create(struct ust_register_msg *msg, int sock) */ ret = lttng_fd_get(LTTNG_FD_APPS, 2); if (ret) { - ERR("Failed to reserve two file descriptors for the event source pipe while creating a new application instance: app = '%s' (ppid: %d)", - msg->name, (int) msg->ppid); + ERR("Failed to reserve two file descriptors for the event source pipe while creating a new application instance: app = '%s', pid = %d", + msg->name, (int) msg->pid); goto error; } event_notifier_event_source_pipe = lttng_pipe_open(FD_CLOEXEC); if (!event_notifier_event_source_pipe) { - PERROR("Failed to open application event source pipe: '%s' (ppid = %d)", - msg->name, msg->ppid); + PERROR("Failed to open application event source pipe: '%s' (pid = %d)", + msg->name, msg->pid); goto error; } @@ -3894,6 +3983,8 @@ struct ust_app *ust_app_create(struct ust_register_msg *msg, int sock) goto error_free_pipe; } + urcu_ref_init(<a->ref); + lta->event_notifier_group.event_pipe = event_notifier_event_source_pipe; lta->ppid = msg->ppid; @@ -3972,8 +4063,8 @@ void ust_app_add(struct ust_app *app) lttng_ht_node_init_ulong(&app->notify_sock_n, app->notify_sock); lttng_ht_add_unique_ulong(ust_app_ht_by_notify_sock, &app->notify_sock_n); - DBG("App registered with pid:%d ppid:%d uid:%d gid:%d sock:%d name:%s " - "notify_sock:%d (version %d.%d)", app->pid, app->ppid, app->uid, + DBG("App registered with pid:%d ppid:%d uid:%d gid:%d sock =%d name:%s " + "notify_sock =%d (version %d.%d)", app->pid, app->ppid, app->uid, app->gid, app->sock, app->name, app->notify_sock, app->v_major, app->v_minor); @@ -3996,10 +4087,15 @@ int ust_app_version(struct ust_app *app) ret = lttng_ust_ctl_tracer_version(app->sock, &app->version); pthread_mutex_unlock(&app->sock_lock); if (ret < 0) { - if (ret != -LTTNG_UST_ERR_EXITING && ret != -EPIPE) { - ERR("UST app %d version failed with ret %d", app->sock, ret); + if (ret == -LTTNG_UST_ERR_EXITING || ret == -EPIPE) { + DBG3("UST app version failed. Application is dead: pid = %d, sock = %d", + app->pid, app->sock); + } else if (ret == -EAGAIN) { + WARN("UST app version failed. Communication time out: pid = %d, sock = %d", + app->pid, app->sock); } else { - DBG3("UST app %d version failed. Application is dead", app->sock); + ERR("UST app version failed with ret %d: pid = %d, sock = %d", + ret, app->pid, app->sock); } } @@ -4046,21 +4142,25 @@ int ust_app_setup_event_notifier_group(struct ust_app *app) event_pipe_write_fd, &event_notifier_group); pthread_mutex_unlock(&app->sock_lock); if (ret < 0) { - if (ret != -LTTNG_UST_ERR_EXITING && ret != -EPIPE) { - ERR("Failed to create application event notifier group: ret = %d, app socket fd = %d, event_pipe_write_fd = %d", - ret, app->sock, event_pipe_write_fd); + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + ret = 0; + DBG3("UST app create event notifier group failed. Application is dead: pid = %d, sock = %d", + app->pid, app->sock); + } else if (ret == -EAGAIN) { + ret = 0; + WARN("UST app create event notifier group failed. Communication time out: pid = %d, sock = %d", + app->pid, app->sock); } else { - DBG("Failed to create application event notifier group (application is dead): app socket fd = %d", - app->sock); + ERR("UST app create event notifier group failed with ret %d: pid = %d, sock = %d, event_pipe_write_fd: %d", + ret, app->pid, app->sock, event_pipe_write_fd); } - goto error; } ret = lttng_pipe_write_close(app->event_notifier_group.event_pipe); if (ret) { - ERR("Failed to close write end of the application's event source pipe: app = '%s' (ppid = %d)", - app->name, app->ppid); + ERR("Failed to close write end of the application's event source pipe: app = '%s' (pid = %d)", + app->name, app->pid); goto error; } @@ -4090,13 +4190,13 @@ int ust_app_setup_event_notifier_group(struct ust_app *app) case EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK: break; case EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_UNSUPPORTED: - DBG3("Failed to setup event notifier error accounting (application does not support notifier error accounting): app socket fd = %d, app name = '%s', app ppid = %d", - app->sock, app->name, (int) app->ppid); + DBG3("Failed to setup event notifier error accounting (application does not support notifier error accounting): app socket fd = %d, app name = '%s', app pid = %d", + app->sock, app->name, (int) app->pid); ret = 0; goto error_accounting; case EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_APP_DEAD: - DBG3("Failed to setup event notifier error accounting (application is dead): app socket fd = %d, app name = '%s', app ppid = %d", - app->sock, app->name, (int) app->ppid); + DBG3("Failed to setup event notifier error accounting (application is dead): app socket fd = %d, app name = '%s', app pid = %d", + app->sock, app->name, (int) app->pid); ret = 0; goto error_accounting; default: @@ -4123,49 +4223,32 @@ error: return ret; } -/* - * Unregister app by removing it from the global traceable app list and freeing - * the data struct. - * - * The socket is already closed at this point so no close to sock. - */ -void ust_app_unregister(int sock) +static void ust_app_unregister(struct ust_app *app) { - struct ust_app *lta; - struct lttng_ht_node_ulong *node; - struct lttng_ht_iter ust_app_sock_iter; + int ret; struct lttng_ht_iter iter; struct ust_app_session *ua_sess; - int ret; rcu_read_lock(); - /* Get the node reference for a call_rcu */ - lttng_ht_lookup(ust_app_ht_by_sock, (void *)((unsigned long) sock), &ust_app_sock_iter); - node = lttng_ht_iter_get_node_ulong(&ust_app_sock_iter); - assert(node); - - lta = caa_container_of(node, struct ust_app, sock_n); - DBG("PID %d unregistering with sock %d", lta->pid, sock); - /* * For per-PID buffers, perform "push metadata" and flush all * application streams before removing app from hash tables, * ensuring proper behavior of data_pending check. * Remove sessions so they are not visible during deletion. */ - cds_lfht_for_each_entry(lta->sessions->ht, &iter.iter, ua_sess, + cds_lfht_for_each_entry(app->sessions->ht, &iter.iter, ua_sess, node.node) { struct ust_registry_session *registry; - ret = lttng_ht_del(lta->sessions, &iter); + ret = lttng_ht_del(app->sessions, &iter); if (ret) { /* The session was already removed so scheduled for teardown. */ continue; } if (ua_sess->buffer_type == LTTNG_BUFFER_PER_PID) { - (void) ust_app_flush_app_session(lta, ua_sess); + (void) ust_app_flush_app_session(app, ua_sess); } /* @@ -4206,41 +4289,63 @@ void ust_app_unregister(int sock) (void) close_metadata(registry, ua_sess->consumer); } } - cds_list_add(&ua_sess->teardown_node, <a->teardown_head); + cds_list_add(&ua_sess->teardown_node, &app->teardown_head); pthread_mutex_unlock(&ua_sess->lock); } - /* Remove application from PID hash table */ - ret = lttng_ht_del(ust_app_ht_by_sock, &ust_app_sock_iter); - assert(!ret); - /* * Remove application from notify hash table. The thread handling the * notify socket could have deleted the node so ignore on error because * either way it's valid. The close of that socket is handled by the * apps_notify_thread. */ - iter.iter.node = <a->notify_sock_n.node; + iter.iter.node = &app->notify_sock_n.node; (void) lttng_ht_del(ust_app_ht_by_notify_sock, &iter); - /* - * Ignore return value since the node might have been removed before by an - * add replace during app registration because the PID can be reassigned by - * the OS. - */ - iter.iter.node = <a->pid_n.node; + iter.iter.node = &app->pid_n.node; ret = lttng_ht_del(ust_app_ht, &iter); if (ret) { - DBG3("Unregister app by PID %d failed. This can happen on pid reuse", - lta->pid); + WARN("Unregister app by PID %d failed", app->pid); } - /* Free memory */ - call_rcu(<a->pid_n.head, delete_ust_app_rcu); + rcu_read_unlock(); +} + +/* + * Unregister app by removing it from the global traceable app list and freeing + * the data struct. + * + * The socket is already closed at this point, so there is no need to close it. + */ +void ust_app_unregister_by_socket(int sock) +{ + struct ust_app *app; + struct lttng_ht_node_ulong *node; + struct lttng_ht_iter ust_app_sock_iter; + int ret; + + rcu_read_lock(); + + /* Get the node reference for a call_rcu */ + lttng_ht_lookup(ust_app_ht_by_sock, (void *)((unsigned long) sock), &ust_app_sock_iter); + node = lttng_ht_iter_get_node_ulong(&ust_app_sock_iter); + assert(node); + + app = caa_container_of(node, struct ust_app, sock_n); + DBG("PID %d unregistering with sock %d", app->pid, sock); + + /* Remove application from socket hash table */ + ret = lttng_ht_del(ust_app_ht_by_sock, &ust_app_sock_iter); + assert(!ret); + + /* + * The socket is closed: release its reference to the application + * to trigger its eventual teardown. + */ + ust_app_put(app); rcu_read_unlock(); - return; } /* @@ -4298,11 +4403,6 @@ int ust_app_list_events(struct lttng_event **events) app->sock, ret); } else { DBG3("UST app tp list get failed. Application is dead"); - /* - * This is normal behavior, an application can die during the - * creation process. Don't report an error so the execution can - * continue normally. Continue normal execution. - */ break; } free(tmp_event); @@ -4357,8 +4457,17 @@ int ust_app_list_events(struct lttng_event **events) } ret = lttng_ust_ctl_release_handle(app->sock, handle); pthread_mutex_unlock(&app->sock_lock); - if (ret < 0 && ret != -LTTNG_UST_ERR_EXITING && ret != -EPIPE) { - ERR("Error releasing app handle for app %d with ret %d", app->sock, ret); + if (ret < 0) { + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + DBG3("Error releasing app handle. Application died: pid = %d, sock = %d", + app->pid, app->sock); + } else if (ret == -EAGAIN) { + WARN("Error releasing app handle. Communication time out: pid = %d, sock = %d", + app->pid, app->sock); + } else { + ERR("Error releasing app handle with ret %d: pid = %d, sock = %d", + ret, app->pid, app->sock); + } } } @@ -4429,11 +4538,6 @@ int ust_app_list_event_fields(struct lttng_event_field **fields) app->sock, ret); } else { DBG3("UST app tp list field failed. Application is dead"); - /* - * This is normal behavior, an application can die during the - * creation process. Don't report an error so the execution can - * continue normally. Reset list and count for next app. - */ break; } free(tmp_event); @@ -4537,25 +4641,18 @@ void ust_app_clean_list(void) * are unregistered prior to this clean-up. */ assert(lttng_ht_get_count(app->token_to_event_notifier_rule_ht) == 0); - ust_app_notify_sock_unregister(app->notify_sock); } } - if (ust_app_ht) { - cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, pid_n.node) { - ret = lttng_ht_del(ust_app_ht, &iter); - assert(!ret); - call_rcu(&app->pid_n.head, delete_ust_app_rcu); - } - } - /* Cleanup socket hash table */ if (ust_app_ht_by_sock) { cds_lfht_for_each_entry(ust_app_ht_by_sock->ht, &iter.iter, app, sock_n.node) { ret = lttng_ht_del(ust_app_ht_by_sock, &iter); assert(!ret); + + ust_app_put(app); } } @@ -4738,9 +4835,11 @@ int ust_app_disable_event_glb(struct ltt_ust_session *usess, } ua_chan = caa_container_of(ua_chan_node, struct ust_app_channel, node); - ua_event = find_ust_app_event(ua_chan->events, uevent->attr.name, - uevent->filter, uevent->attr.loglevel, - uevent->exclusion); + ua_event = find_ust_app_event(ua_chan->events, + uevent->attr.name, uevent->filter, + (enum lttng_ust_abi_loglevel_type) + uevent->attr.loglevel_type, + uevent->attr.loglevel, uevent->exclusion); if (ua_event == NULL) { DBG2("Event %s not found in channel %s for app pid %d." "Skipping", uevent->attr.name, uchan->name, app->pid); @@ -4898,8 +4997,11 @@ int ust_app_enable_event_glb(struct ltt_ust_session *usess, ua_chan = caa_container_of(ua_chan_node, struct ust_app_channel, node); /* Get event node */ - ua_event = find_ust_app_event(ua_chan->events, uevent->attr.name, - uevent->filter, uevent->attr.loglevel, uevent->exclusion); + ua_event = find_ust_app_event(ua_chan->events, + uevent->attr.name, uevent->filter, + (enum lttng_ust_abi_loglevel_type) + uevent->attr.loglevel_type, + uevent->attr.loglevel, uevent->exclusion); if (ua_event == NULL) { DBG3("UST app enable event %s not found for app PID %d." "Skipping app", uevent->attr.name, app->pid); @@ -5038,18 +5140,20 @@ skip_setup: ret = lttng_ust_ctl_start_session(app->sock, ua_sess->handle); pthread_mutex_unlock(&app->sock_lock); if (ret < 0) { - if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("Error starting tracing for app pid: %d (ret: %d)", - app->pid, ret); - } else { - DBG("UST app start session failed. Application is dead."); - /* - * This is normal behavior, an application can die during the - * creation process. Don't report an error so the execution can - * continue normally. - */ + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + DBG3("UST app start session failed. Application is dead: pid = %d, sock = %d", + app->pid, app->sock); + pthread_mutex_unlock(&ua_sess->lock); + goto end; + } else if (ret == -EAGAIN) { + WARN("UST app start session failed. Communication time out: pid = %d, sock = %d", + app->pid, app->sock); pthread_mutex_unlock(&ua_sess->lock); goto end; + + } else { + ERR("UST app start session failed with ret %d: pid = %d, sock = %d", + ret, app->pid, app->sock); } goto error_unlock; } @@ -5066,9 +5170,17 @@ skip_setup: pthread_mutex_lock(&app->sock_lock); ret = lttng_ust_ctl_wait_quiescent(app->sock); pthread_mutex_unlock(&app->sock_lock); - if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("UST app wait quiescent failed for app pid %d ret %d", - app->pid, ret); + if (ret < 0) { + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + DBG3("UST app wait quiescent failed. Application is dead: pid = %d, sock = %d", + app->pid, app->sock); + } else if (ret == -EAGAIN) { + WARN("UST app wait quiescent failed. Communication time out: pid = %d, sock = %d", + app->pid, app->sock); + } else { + ERR("UST app wait quiescent failed with ret %d: pid %d, sock = %d", + ret, app->pid, app->sock); + } } end: @@ -5130,17 +5242,18 @@ int ust_app_stop_trace(struct ltt_ust_session *usess, struct ust_app *app) ret = lttng_ust_ctl_stop_session(app->sock, ua_sess->handle); pthread_mutex_unlock(&app->sock_lock); if (ret < 0) { - if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("Error stopping tracing for app pid: %d (ret: %d)", - app->pid, ret); - } else { - DBG("UST app stop session failed. Application is dead."); - /* - * This is normal behavior, an application can die during the - * creation process. Don't report an error so the execution can - * continue normally. - */ + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + DBG3("UST app stop session failed. Application is dead: pid = %d, sock = %d", + app->pid, app->sock); goto end_unlock; + } else if (ret == -EAGAIN) { + WARN("UST app stop session failed. Communication time out: pid = %d, sock = %d", + app->pid, app->sock); + goto end_unlock; + + } else { + ERR("UST app stop session failed with ret %d: pid = %d, sock = %d", + ret, app->pid, app->sock); } goto error_rcu_unlock; } @@ -5152,9 +5265,17 @@ int ust_app_stop_trace(struct ltt_ust_session *usess, struct ust_app *app) pthread_mutex_lock(&app->sock_lock); ret = lttng_ust_ctl_wait_quiescent(app->sock); pthread_mutex_unlock(&app->sock_lock); - if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("UST app wait quiescent failed for app pid %d ret %d", - app->pid, ret); + if (ret < 0) { + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + DBG3("UST app wait quiescent failed. Application is dead: pid= %d, sock = %d)", + app->pid, app->sock); + } else if (ret == -EAGAIN) { + WARN("UST app wait quiescent failed. Communication time out: pid= %d, sock = %d)", + app->pid, app->sock); + } else { + ERR("UST app wait quiescent failed with ret %d: pid= %d, sock = %d)", + ret, app->pid, app->sock); + } } health_code_update(); @@ -5502,9 +5623,17 @@ static int destroy_trace(struct ltt_ust_session *usess, struct ust_app *app) pthread_mutex_lock(&app->sock_lock); ret = lttng_ust_ctl_wait_quiescent(app->sock); pthread_mutex_unlock(&app->sock_lock); - if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("UST app wait quiescent failed for app pid %d ret %d", - app->pid, ret); + if (ret < 0) { + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + DBG3("UST app wait quiescent failed. Application is dead: pid= %d, sock = %d)", + app->pid, app->sock); + } else if (ret == -EAGAIN) { + WARN("UST app wait quiescent failed. Communication time out: pid= %d, sock = %d)", + app->pid, app->sock); + } else { + ERR("UST app wait quiescent failed with ret %d: pid= %d, sock = %d)", + ret, app->pid, app->sock); + } } end: rcu_read_unlock(); @@ -5646,7 +5775,10 @@ int ust_app_channel_synchronize_event(struct ust_app_channel *ua_chan, struct ust_app_event *ua_event = NULL; ua_event = find_ust_app_event(ua_chan->events, uevent->attr.name, - uevent->filter, uevent->attr.loglevel, uevent->exclusion); + uevent->filter, + (enum lttng_ust_abi_loglevel_type) + uevent->attr.loglevel_type, + uevent->attr.loglevel, uevent->exclusion); if (!ua_event) { ret = create_ust_app_event(ua_sess, ua_chan, uevent, app); if (ret < 0) { @@ -5895,14 +6027,16 @@ void ust_app_synchronize(struct ltt_ust_session *usess, ret = find_or_create_ust_app_session(usess, app, &ua_sess, NULL); if (ret < 0) { /* Tracer is probably gone or ENOMEM. */ - goto error; + if (ua_sess) { + destroy_app_session(app, ua_sess); + } + goto end; } assert(ua_sess); pthread_mutex_lock(&ua_sess->lock); if (ua_sess->deleted) { - pthread_mutex_unlock(&ua_sess->lock); - goto end; + goto deleted_session; } rcu_read_lock(); @@ -5920,23 +6054,15 @@ void ust_app_synchronize(struct ltt_ust_session *usess, */ ret = create_ust_app_metadata(ua_sess, app, usess->consumer); if (ret < 0) { - goto error_unlock; + ERR("Metadata creation failed for app sock %d for session id %" PRIu64, + app->sock, usess->id); } rcu_read_unlock(); -end: - pthread_mutex_unlock(&ua_sess->lock); - /* Everything went well at this point. */ - return; - -error_unlock: - rcu_read_unlock(); +deleted_session: pthread_mutex_unlock(&ua_sess->lock); -error: - if (ua_sess) { - destroy_app_session(app, ua_sess); - } +end: return; } @@ -5996,16 +6122,16 @@ void ust_app_global_update(struct ltt_ust_session *usess, struct ust_app *app) */ void ust_app_global_update_event_notifier_rules(struct ust_app *app) { - DBG2("UST application global event notifier rules update: app = '%s' (ppid: %d)", - app->name, app->ppid); + DBG2("UST application global event notifier rules update: app = '%s', pid = %d)", + app->name, app->pid); if (!app->compatible || !ust_app_supports_notifiers(app)) { return; } if (app->event_notifier_group.object == NULL) { - WARN("UST app global update of event notifiers for app skipped since communication handle is null: app = '%s' (ppid: %d)", - app->name, app->ppid); + WARN("UST app global update of event notifiers for app skipped since communication handle is null: app = '%s' pid = %d)", + app->name, app->pid); return; } @@ -6199,6 +6325,104 @@ error: return ua_chan; } +/* + * Fixup legacy context fields for comparison: + * - legacy array becomes array_nestable, + * - legacy struct becomes struct_nestable, + * - legacy variant becomes variant_nestable, + * legacy sequences are not emitted in LTTng-UST contexts. + */ +static int ust_app_fixup_legacy_context_fields(size_t *_nr_fields, + struct lttng_ust_ctl_field **_fields) +{ + struct lttng_ust_ctl_field *fields = *_fields, *new_fields = NULL; + size_t nr_fields = *_nr_fields, new_nr_fields = 0, i, j; + bool found = false; + int ret = 0; + + for (i = 0; i < nr_fields; i++) { + const struct lttng_ust_ctl_field *field = &fields[i]; + + switch (field->type.atype) { + case lttng_ust_ctl_atype_sequence: + ERR("Unexpected legacy sequence context."); + ret = -EINVAL; + goto end; + case lttng_ust_ctl_atype_array: + switch (field->type.u.legacy.array.elem_type.atype) { + case lttng_ust_ctl_atype_integer: + break; + default: + ERR("Unexpected legacy array element type in context."); + ret = -EINVAL; + goto end; + } + found = true; + /* One field for array_nested, one field for elem type. */ + new_nr_fields += 2; + break; + + case lttng_ust_ctl_atype_struct: /* Fallthrough */ + case lttng_ust_ctl_atype_variant: + found = true; + new_nr_fields++; + break; + default: + new_nr_fields++; + break; + } + } + if (!found) { + goto end; + } + new_fields = (struct lttng_ust_ctl_field *) zmalloc(sizeof(*new_fields) * new_nr_fields); + if (!new_fields) { + ret = -ENOMEM; + goto end; + } + for (i = 0, j = 0; i < nr_fields; i++, j++) { + const struct lttng_ust_ctl_field *field = &fields[i]; + struct lttng_ust_ctl_field *new_field = &new_fields[j]; + + switch (field->type.atype) { + case lttng_ust_ctl_atype_array: + /* One field for array_nested, one field for elem type. */ + strncpy(new_field->name, field->name, LTTNG_UST_ABI_SYM_NAME_LEN - 1); + new_field->type.atype = lttng_ust_ctl_atype_array_nestable; + new_field->type.u.array_nestable.length = field->type.u.legacy.array.length; + new_field->type.u.array_nestable.alignment = 0; + new_field = &new_fields[++j]; /* elem type */ + new_field->type.atype = field->type.u.legacy.array.elem_type.atype; + assert(new_field->type.atype == lttng_ust_ctl_atype_integer); + new_field->type.u.integer = field->type.u.legacy.array.elem_type.u.basic.integer; + break; + case lttng_ust_ctl_atype_struct: + strncpy(new_field->name, field->name, LTTNG_UST_ABI_SYM_NAME_LEN - 1); + new_field->type.atype = lttng_ust_ctl_atype_struct_nestable; + new_field->type.u.struct_nestable.nr_fields = field->type.u.legacy._struct.nr_fields; + new_field->type.u.struct_nestable.alignment = 0; + break; + case lttng_ust_ctl_atype_variant: + strncpy(new_field->name, field->name, LTTNG_UST_ABI_SYM_NAME_LEN - 1); + new_field->type.atype = lttng_ust_ctl_atype_variant_nestable; + new_field->type.u.variant_nestable.nr_choices = field->type.u.legacy.variant.nr_choices; + strncpy(new_field->type.u.variant_nestable.tag_name, + field->type.u.legacy.variant.tag_name, + LTTNG_UST_ABI_SYM_NAME_LEN - 1); + new_field->type.u.variant_nestable.alignment = 0; + break; + default: + *new_field = *field; + break; + } + } + free(fields); + *_fields = new_fields; + *_nr_fields = new_nr_fields; +end: + return ret; +} + /* * Reply to a register channel notification from an application on the notify * socket. The channel metadata is also created. @@ -6213,7 +6437,7 @@ static int reply_ust_register_channel(int sock, int cobjd, int ret, ret_code = 0; uint32_t chan_id; uint64_t chan_reg_key; - enum lttng_ust_ctl_channel_header type; + enum lttng_ust_ctl_channel_header type = LTTNG_UST_CTL_CHANNEL_HEADER_UNKNOWN; struct ust_app *app; struct ust_app_channel *ua_chan; struct ust_app_session *ua_sess; @@ -6227,7 +6451,7 @@ static int reply_ust_register_channel(int sock, int cobjd, if (!app) { DBG("Application socket %d is being torn down. Abort event notify", sock); - ret = 0; + ret = -1; goto error_rcu_unlock; } @@ -6262,6 +6486,16 @@ static int reply_ust_register_channel(int sock, int cobjd, ust_reg_chan = ust_registry_channel_find(registry, chan_reg_key); assert(ust_reg_chan); + /* Channel id is set during the object creation. */ + chan_id = ust_reg_chan->chan_id; + + ret = ust_app_fixup_legacy_context_fields(&nr_fields, &fields); + if (ret < 0) { + ERR("Registering application channel due to legacy context fields fixup error: pid = %d, sock = %d", + app->pid, app->sock); + ret_code = -EINVAL; + goto reply; + } if (!ust_reg_chan->register_done) { /* * TODO: eventually use the registry event count for @@ -6276,9 +6510,19 @@ static int reply_ust_register_channel(int sock, int cobjd, } else { /* Get current already assigned values. */ type = ust_reg_chan->header_type; + /* + * Validate that the context fields match between + * registry and newcoming application. + */ + if (!match_lttng_ust_ctl_field_array(ust_reg_chan->ctx_fields, + ust_reg_chan->nr_ctx_fields, + fields, nr_fields)) { + ERR("Registering application channel due to context field mismatch: pid = %d, sock = %d", + app->pid, app->sock); + ret_code = -EINVAL; + goto reply; + } } - /* Channel id is set during the object creation. */ - chan_id = ust_reg_chan->chan_id; /* Append to metadata */ if (!ust_reg_chan->metadata_dumped) { @@ -6291,15 +6535,20 @@ static int reply_ust_register_channel(int sock, int cobjd, reply: DBG3("UST app replying to register channel key %" PRIu64 - " with id %u, type: %d, ret: %d", chan_reg_key, chan_id, type, + " with id %u, type = %d, ret = %d", chan_reg_key, chan_id, type, ret_code); ret = lttng_ust_ctl_reply_register_channel(sock, chan_id, type, ret_code); if (ret < 0) { - if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("UST app reply channel failed with ret %d", ret); + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + DBG3("UST app reply channel failed. Application died: pid = %d, sock = %d", + app->pid, app->sock); + } else if (ret == -EAGAIN) { + WARN("UST app reply channel failed. Communication time out: pid = %d, sock = %d", + app->pid, app->sock); } else { - DBG3("UST app reply channel failed. Application died"); + ERR("UST app reply channel failed with ret %d: pid = %d, sock = %d", + ret, app->pid, app->sock); } goto error; } @@ -6343,7 +6592,7 @@ static int add_event_ust_registry(int sock, int sobjd, int cobjd, char *name, if (!app) { DBG("Application socket %d is being torn down. Abort event notify", sock); - ret = 0; + ret = -1; goto error_rcu_unlock; } @@ -6393,10 +6642,15 @@ static int add_event_ust_registry(int sock, int sobjd, int cobjd, char *name, */ ret = lttng_ust_ctl_reply_register_event(sock, event_id, ret_code); if (ret < 0) { - if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("UST app reply event failed with ret %d", ret); + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + DBG3("UST app reply event failed. Application died: pid = %d, sock = %d.", + app->pid, app->sock); + } else if (ret == -EAGAIN) { + WARN("UST app reply event failed. Communication time out: pid = %d, sock = %d", + app->pid, app->sock); } else { - DBG3("UST app reply event failed. Application died"); + ERR("UST app reply event failed with ret %d: pid = %d, sock = %d", + ret, app->pid, app->sock); } /* * No need to wipe the create event since the application socket will @@ -6444,6 +6698,7 @@ static int add_enum_ust_registry(int sock, int sobjd, char *name, DBG("Application socket %d is being torn down. Aborting enum registration", sock); free(entries); + ret = -1; goto error_rcu_unlock; } @@ -6481,10 +6736,15 @@ static int add_enum_ust_registry(int sock, int sobjd, char *name, */ ret = lttng_ust_ctl_reply_register_enum(sock, enum_id, ret_code); if (ret < 0) { - if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("UST app reply enum failed with ret %d", ret); + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + DBG3("UST app reply enum failed. Application died: pid = %d, sock = %d", + app->pid, app->sock); + } else if (ret == -EAGAIN) { + WARN("UST app reply enum failed. Communication time out: pid = %d, sock = %d", + app->pid, app->sock); } else { - DBG3("UST app reply enum failed. Application died"); + ERR("UST app reply enum failed with ret %d: pid = %d, sock = %d", + ret, app->pid, app->sock); } /* * No need to wipe the create enum since the application socket will @@ -6516,10 +6776,15 @@ int ust_app_recv_notify(int sock) ret = lttng_ust_ctl_recv_notify(sock, &cmd); if (ret < 0) { - if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("UST app recv notify failed with ret %d", ret); + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + DBG3("UST app recv notify failed. Application died: sock = %d", + sock); + } else if (ret == -EAGAIN) { + WARN("UST app recv notify failed. Communication time out: sock = %d", + sock); } else { - DBG3("UST app recv notify failed. Application died"); + ERR("UST app recv notify failed with ret %d: sock = %d", + ret, sock); } goto error; } @@ -6538,10 +6803,15 @@ int ust_app_recv_notify(int sock) &loglevel_value, &sig, &nr_fields, &fields, &model_emf_uri); if (ret < 0) { - if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("UST app recv event failed with ret %d", ret); + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + DBG3("UST app recv event failed. Application died: sock = %d", + sock); + } else if (ret == -EAGAIN) { + WARN("UST app recv event failed. Communication time out: sock = %d", + sock); } else { - DBG3("UST app recv event failed. Application died"); + ERR("UST app recv event failed with ret %d: sock = %d", + ret, sock); } goto error; } @@ -6571,10 +6841,15 @@ int ust_app_recv_notify(int sock) ret = lttng_ust_ctl_recv_register_channel(sock, &sobjd, &cobjd, &nr_fields, &fields); if (ret < 0) { - if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("UST app recv channel failed with ret %d", ret); + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + DBG3("UST app recv channel failed. Application died: sock = %d", + sock); + } else if (ret == -EAGAIN) { + WARN("UST app recv channel failed. Communication time out: sock = %d", + sock); } else { - DBG3("UST app recv channel failed. Application died"); + ERR("UST app recv channel failed with ret %d: sock = %d)", + ret, sock); } goto error; } @@ -6604,10 +6879,15 @@ int ust_app_recv_notify(int sock) ret = lttng_ust_ctl_recv_register_enum(sock, &sobjd, name, &entries, &nr_entries); if (ret < 0) { - if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { - ERR("UST app recv enum failed with ret %d", ret); + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + DBG3("UST app recv enum failed. Application died: sock = %d", + sock); + } else if (ret == -EAGAIN) { + WARN("UST app recv enum failed. Communication time out: sock = %d", + sock); } else { - DBG3("UST app recv enum failed. Application died"); + ERR("UST app recv enum failed with ret %d: sock = %d", + ret, sock); } goto error; } @@ -6710,7 +6990,7 @@ close_socket: /* * Destroy a ust app data structure and free its memory. */ -void ust_app_destroy(struct ust_app *app) +static void ust_app_destroy(struct ust_app *app) { if (!app) { return; @@ -7137,7 +7417,7 @@ enum lttng_error_code ust_app_rotate_session(struct ltt_session *session) int ret; enum lttng_error_code cmd_ret = LTTNG_OK; struct lttng_ht_iter iter; - struct ust_app *app; + struct ust_app *app = NULL; struct ltt_ust_session *usess = session->ust_session; assert(usess); @@ -7210,10 +7490,20 @@ enum lttng_error_code ust_app_rotate_session(struct ltt_session *session) struct ust_app_channel *ua_chan; struct ust_app_session *ua_sess; struct ust_registry_session *registry; + bool app_reference_taken; + + app_reference_taken = ust_app_get(app); + if (!app_reference_taken) { + /* Application unregistered concurrently, skip it. */ + DBG("Could not get application reference as it is being torn down; skipping application"); + continue; + } ua_sess = lookup_session_by_app(usess, app); if (!ua_sess) { /* Session not associated with this app. */ + ust_app_put(app); + app = NULL; continue; } @@ -7225,11 +7515,9 @@ enum lttng_error_code ust_app_rotate_session(struct ltt_session *session) goto error; } + registry = get_session_registry(ua_sess); - if (!registry) { - DBG("Application session is being torn down. Skip application."); - continue; - } + assert(registry); /* Rotate the data channels. */ cds_lfht_for_each_entry(ua_sess->channels->ht, &chan_iter.iter, @@ -7241,9 +7529,6 @@ enum lttng_error_code ust_app_rotate_session(struct ltt_session *session) ua_sess->consumer, /* is_metadata_channel */ false); if (ret < 0) { - /* Per-PID buffer and application going away. */ - if (ret == -LTTNG_ERR_CHAN_NOT_FOUND) - continue; cmd_ret = LTTNG_ERR_ROTATION_FAIL_CONSUMER; goto error; } @@ -7258,13 +7543,15 @@ enum lttng_error_code ust_app_rotate_session(struct ltt_session *session) ua_sess->consumer, /* is_metadata_channel */ true); if (ret < 0) { - /* Per-PID buffer and application going away. */ - if (ret == -LTTNG_ERR_CHAN_NOT_FOUND) - continue; cmd_ret = LTTNG_ERR_ROTATION_FAIL_CONSUMER; goto error; } + + ust_app_put(app); + app = NULL; } + + app = NULL; break; } default: @@ -7275,6 +7562,7 @@ enum lttng_error_code ust_app_rotate_session(struct ltt_session *session) cmd_ret = LTTNG_OK; error: + ust_app_put(app); rcu_read_unlock(); return cmd_ret; } @@ -7656,3 +7944,26 @@ error: rcu_read_unlock(); return ret; } + +static void ust_app_release(struct urcu_ref *ref) +{ + struct ust_app *app = container_of(ref, struct ust_app, ref); + + ust_app_unregister(app); + ust_app_destroy(app); +} + +bool ust_app_get(struct ust_app *app) +{ + assert(app); + return urcu_ref_get_unless_zero(&app->ref); +} + +void ust_app_put(struct ust_app *app) +{ + if (!app) { + return; + } + + urcu_ref_put(&app->ref, ust_app_release); +}