Fix: consumer metadata switch timer error handling
authorMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Thu, 25 Apr 2013 18:45:17 +0000 (14:45 -0400)
committerDavid Goulet <dgoulet@efficios.com>
Fri, 26 Apr 2013 15:26:30 +0000 (11:26 -0400)
The thread handling the metadata switch cannot call
consumer_timer_switch_stop(), because it would become stucked waiting
for itself to execute the teardown signal handler.

Moreover, only one thread is allowed to call start/stop timer (by
design). Therefore, it's a race to have the thread handling the timer
handler calling "stop".

Fix this by adding a "switch_timer_error" flag, which just inhibits the
timer if an error is encountered, but does not modify its state
otherwise.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: David Goulet <dgoulet@efficios.com>
src/common/consumer-timer.c
src/common/consumer.h

index ef056d1188d69571f6131c89c34cf3d8f622d2a9..641478697904e516df1feb7e5c214b814fd38fb0 100644 (file)
@@ -61,17 +61,17 @@ static void metadata_switch_timer(struct lttng_consumer_local_data *ctx,
        channel = si->si_value.sival_ptr;
        assert(channel);
 
        channel = si->si_value.sival_ptr;
        assert(channel);
 
+       if (channel->switch_timer_error) {
+               return;
+       }
+
        DBG("Switch timer for channel %" PRIu64, channel->key);
        switch (ctx->type) {
        case LTTNG_CONSUMER32_UST:
        case LTTNG_CONSUMER64_UST:
                ret = lttng_ustconsumer_request_metadata(ctx, channel);
                if (ret < 0) {
        DBG("Switch timer for channel %" PRIu64, channel->key);
        switch (ctx->type) {
        case LTTNG_CONSUMER32_UST:
        case LTTNG_CONSUMER64_UST:
                ret = lttng_ustconsumer_request_metadata(ctx, channel);
                if (ret < 0) {
-                       /*
-                        * An error means that we were unable to request the metadata to
-                        * the session daemon so stop the timer for that channel.
-                        */
-                       consumer_timer_switch_stop(channel);
+                       channel->switch_timer_error = 1;
                }
                break;
        case LTTNG_CONSUMER_KERNEL:
                }
                break;
        case LTTNG_CONSUMER_KERNEL:
@@ -83,6 +83,8 @@ static void metadata_switch_timer(struct lttng_consumer_local_data *ctx,
 
 /*
  * Set the timer for periodical metadata flush.
 
 /*
  * Set the timer for periodical metadata flush.
+ * Should be called only from the recv cmd thread (single thread ensures
+ * mutual exclusion).
  */
 void consumer_timer_switch_start(struct lttng_consumer_channel *channel,
                unsigned int switch_timer_interval)
  */
 void consumer_timer_switch_start(struct lttng_consumer_channel *channel,
                unsigned int switch_timer_interval)
@@ -120,6 +122,8 @@ void consumer_timer_switch_start(struct lttng_consumer_channel *channel,
 
 /*
  * Stop and delete timer.
 
 /*
  * Stop and delete timer.
+ * Should be called only from the recv cmd thread (single thread ensures
+ * mutual exclusion).
  */
 void consumer_timer_switch_stop(struct lttng_consumer_channel *channel)
 {
  */
 void consumer_timer_switch_stop(struct lttng_consumer_channel *channel)
 {
index 19a590e801e9739ff2aca138c5bf9cfe62417e30..844310681db21ccddf86b9ad47dfa5cb249bef2d 100644 (file)
@@ -146,6 +146,8 @@ struct lttng_consumer_channel {
        /* For metadata periodical flush */
        int switch_timer_enabled;
        timer_t switch_timer;
        /* For metadata periodical flush */
        int switch_timer_enabled;
        timer_t switch_timer;
+       int switch_timer_error;
+
        /* On-disk circular buffer */
        uint64_t tracefile_size;
        uint64_t tracefile_count;
        /* On-disk circular buffer */
        uint64_t tracefile_size;
        uint64_t tracefile_count;
This page took 0.027406 seconds and 4 git commands to generate.