ust-consumerd: fix exit race crashes
authorJason Wessel <jason.wessel@windriver.com>
Wed, 27 Apr 2011 20:22:14 +0000 (22:22 +0200)
committerNils Carlson <nils.carlson@ericsson.com>
Thu, 28 Apr 2011 13:14:47 +0000 (15:14 +0200)
The ust-consumerd gets shutdown by the SIGTERM signal and a number of
places in the ust-consumerd did not properly deal with the case where
a system call returns EINTR in errno as a result of a signal to the
process.  The failure to handle EINTR properly was leading to some
data corruption in the buffer code and causing some random "victim"
crashes in lowlevel.c

The way all the offending functions were tracked down was to
temporarily add an abort() in the SIGTERM signal handler.  Then it was
a matter of looking at what threads were blocked on system calls at
the time outside of the thread that received the signal.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Nils Carlson <nils.carlson@ericsson.com>
libustconsumer/libustconsumer.c
ust-consumerd/ust-consumerd.c

index c5acffa030296eda5b775fed81e13ec8b7134b7d..abf21d801ca1473fd8b1b3c1453636361fc0199a 100644 (file)
@@ -477,6 +477,8 @@ int consumer_loop(struct ustconsumer_instance *instance, struct buffer_info *buf
                        DBG("App died while being traced");
                        finish_consuming_dead_subbuffer(instance->callbacks, buf);
                        break;
+               } else if (read_result == -1 && errno == EINTR) {
+                       continue;
                }
 
                if(instance->callbacks->on_read_subbuffer)
@@ -783,8 +785,11 @@ int ustconsumer_stop_instance(struct ustconsumer_instance *instance, int send_ms
 
        struct sockaddr_un addr;
 
+socket_again:
        result = fd = socket(PF_UNIX, SOCK_STREAM, 0);
        if(result == -1) {
+               if (errno == EINTR)
+                       goto socket_again;
                PERROR("socket");
                return 1;
        }
@@ -794,13 +799,21 @@ int ustconsumer_stop_instance(struct ustconsumer_instance *instance, int send_ms
        strncpy(addr.sun_path, instance->sock_path, UNIX_PATH_MAX);
        addr.sun_path[UNIX_PATH_MAX-1] = '\0';
 
+connect_again:
        result = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
        if(result == -1) {
+               if (errno == EINTR)
+                       goto connect_again;
                PERROR("connect");
        }
 
-       while(bytes != sizeof(msg))
-               bytes += send(fd, msg, sizeof(msg), 0);
+       while(bytes != sizeof(msg)) {
+               int inc = send(fd, msg, sizeof(msg), 0);
+               if (inc < 0 && errno != EINTR)
+                       break;
+               else
+                       bytes += inc;
+       }
 
        close(fd);
 
index ce2ee40a339a96b05e880c7355af1a2e63a2f2f6..c96139455d24ecdbb54014f9f829b42819ef2f5f 100644 (file)
@@ -210,7 +210,11 @@ int on_open_buffer(struct ustconsumer_callbacks *data, struct buffer_info *buf)
                    trace_path, buf->pid, buf->pidunique, buf->name);
                return 1;
        }
+again:
        result = fd = open(tmp, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 00600);
+       if (result == -1 && errno == EINTR)
+               goto again;
+
        if(result == -1) {
                PERROR("open");
                ERR("failed opening trace file %s", tmp);
@@ -225,7 +229,12 @@ int on_open_buffer(struct ustconsumer_callbacks *data, struct buffer_info *buf)
 int on_close_buffer(struct ustconsumer_callbacks *data, struct buffer_info *buf)
 {
        struct buffer_info_local *buf_local = buf->user_data;
-       int result = close(buf_local->file_fd);
+       int result;
+
+again:
+       result = close(buf_local->file_fd);
+       if (result == -1 && errno == EINTR)
+               goto again;
        free(buf_local);
        if(result == -1) {
                PERROR("close");
This page took 0.025786 seconds and 4 git commands to generate.