Perform trace factor reduction as a separate step
[lttv.git] / lttv / lttv / sync / sync_chain.c
index 210be504d614590f8e33a2ca85841f3268962f65..08cc3cb33b2b8f137e993b9eb7214fa548079091 100644 (file)
 /* This file is part of the Linux Trace Toolkit viewer
- * Copyright (C) 2009 Benjamin Poirier <benjamin.poirier@polymtl.ca>
+ * Copyright (C) 2009, 2010 Benjamin Poirier <benjamin.poirier@polymtl.ca>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License Version 2 as
- * published by the Free Software Foundation;
+ * This program is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 2.1 of the License, or (at
+ * your option) any later version.
  *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
  *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#define _ISOC99_SOURCE
+
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
 
+#include <errno.h>
+#include <math.h>
 #include <stdlib.h>
-#include <sys/time.h>
-#include <sys/resource.h>
-
-#include <lttv/module.h>
-#include <lttv/option.h>
+#include <string.h>
+#include <unistd.h>
 
 #include "sync_chain.h"
 
 
-#ifndef g_info
-#define g_info(format...) g_log (G_LOG_DOMAIN, G_LOG_LEVEL_INFO, format)
-#endif
-
-
-static void init();
-static void destroy();
-
-static void timeDiff(struct timeval* const end, const struct timeval* const start);
-static gint gcfCompareAnalysis(gconstpointer a, gconstpointer b);
-static gint gcfCompareProcessing(gconstpointer a, gconstpointer b);
-static void gfAppendAnalysisName(gpointer data, gpointer user_data);
-
-static gboolean optionSync;
-static gboolean optionSyncStats;
-static gboolean optionSyncNull;
-static char* optionSyncAnalysis;
-
 GQueue processingModules= G_QUEUE_INIT;
 GQueue matchingModules= G_QUEUE_INIT;
 GQueue analysisModules= G_QUEUE_INIT;
+GQueue moduleOptions= G_QUEUE_INIT;
+
+
+static void floydWarshall(AllFactors* const allFactors, double*** const
+       distances, unsigned int*** const predecessors);
+static void getFactors(AllFactors* const allFactors, unsigned int** const
+       predecessors, unsigned int* const references, const unsigned int traceNum,
+       Factors* const factors);
 
 
 /*
- * Module init function
+ * Call the statistics function of each module of a sync chain
  *
- * This function is declared to be the module initialization function. Event
- * modules are registered with a "constructor (102)" attribute except one in
- * each class (processing, matching, analysis) which is chosen to be the
- * default and which is registered with a "constructor (101)" attribute.
- * Constructors with no priority are called after constructors with
- * priorities. The result is that the list of event modules is known when this
- * function is executed.
+ * Args:
+ *   syncState:    Container for synchronization data
  */
-static void init()
+void printStats(SyncState* const syncState)
 {
-       GString* analysisModulesNames;
-
-       g_debug("\t\t\tXXXX sync init\n");
-
-       optionSync= FALSE;
-       lttv_option_add("sync", '\0', "synchronize the time between the traces" ,
-               "none", LTTV_OPT_NONE, &optionSync, NULL, NULL);
-
-       optionSyncStats= FALSE;
-       lttv_option_add("sync-stats", '\0', "print statistics about the time "
-               "synchronization", "none", LTTV_OPT_NONE, &optionSyncStats, NULL,
-               NULL);
-
-       optionSyncNull= FALSE;
-       lttv_option_add("sync-null", '\0', "read the events but do not perform "
-               "any processing", "none", LTTV_OPT_NONE, &optionSyncNull, NULL, NULL);
-
-       g_assert(g_queue_get_length(&analysisModules) > 0);
-       optionSyncAnalysis= ((AnalysisModule*)
-               g_queue_peek_head(&analysisModules))->name;
-       analysisModulesNames= g_string_new("");
-       g_queue_foreach(&analysisModules, &gfAppendAnalysisName,
-               analysisModulesNames);
-       // remove the last ", "
-       g_string_truncate(analysisModulesNames, analysisModulesNames->len - 2);
-       lttv_option_add("sync-analysis", '\0', "specify the algorithm to use for "
-               "event analysis" , analysisModulesNames->str, LTTV_OPT_STRING,
-               &optionSyncAnalysis, NULL, NULL);
-       g_string_free(analysisModulesNames, TRUE);
+       if (syncState->processingModule->printProcessingStats != NULL)
+       {
+               syncState->processingModule->printProcessingStats(syncState);
+       }
+       if (syncState->matchingModule->printMatchingStats != NULL)
+       {
+               syncState->matchingModule->printMatchingStats(syncState);
+       }
+       if (syncState->analysisModule->printAnalysisStats != NULL)
+       {
+               syncState->analysisModule->printAnalysisStats(syncState);
+       }
 }
 
 
 /*
- * Module unload function
+ * Calculate the elapsed time between two timeval values
+ *
+ * Args:
+ *   end:          end time, result is also stored in this structure
+ *   start:        start time
  */
-static void destroy()
+void timeDiff(struct timeval* const end, const struct timeval* const start)
 {
-       g_debug("\t\t\tXXXX sync destroy\n");
-
-       lttv_option_remove("sync");
-       lttv_option_remove("sync-stats");
-       lttv_option_remove("sync-null");
-       lttv_option_remove("sync-analysis");
+               if (end->tv_usec >= start->tv_usec)
+               {
+                       end->tv_sec-= start->tv_sec;
+                       end->tv_usec-= start->tv_usec;
+               }
+               else
+               {
+                       end->tv_sec= end->tv_sec - start->tv_sec - 1;
+                       end->tv_usec= end->tv_usec - start->tv_usec + 1e6;
+               }
 }
 
 
 /*
- * Calculate a traceset's drift and offset values based on network events
+ * Calculate a resulting offset and drift for each trace.
+ *
+ * Traces are assembled in groups. A group is an "island" of nodes/traces that
+ * exchanged messages. A reference is determined for each group by using a
+ * shortest path search based on the accuracy of the approximation. This also
+ * forms a tree of the best way to relate each node's clock to the reference's
+ * based on the accuracy. Sometimes it may be necessary or advantageous to
+ * propagate the factors through intermediary clocks. Resulting factors for
+ * each trace are determined based on this tree.
+ *
+ * This part was not the focus of my research. The algorithm used here is
+ * inexact in some ways:
+ * 1) The reference used may not actually be the best one to use. This is
+ *    because the accuracy is not corrected based on the drift during the
+ *    shortest path search.
+ * 2) The min and max factors are not propagated and are no longer valid.
+ * 3) Approximations of different types (ACCURATE and APPROXIMATE) are compared
+ *    together. The "accuracy" parameters of these have different meanings and
+ *    are not readily comparable.
+ *
+ * Nevertheless, the result is satisfactory. You just can't tell "how much" it
+ * is.
  *
- * The individual correction factors are written out to each trace.
+ * Two alternative (and subtly different) ways of propagating factors to
+ * preserve min and max boundaries have been proposed, see:
+ * [Duda, A., Harrus, G., Haddad, Y., and Bernard, G.: Estimating global time
+ * in distributed systems, Proc. 7th Int. Conf. on Distributed Computing
+ * Systems, Berlin, volume 18, 1987] p.304
+ *
+ * [Jezequel, J.M., and Jard, C.: Building a global clock for observing
+ * computations in distributed memory parallel computers, Concurrency:
+ * Practice and Experience 8(1), volume 8, John Wiley & Sons, Ltd Chichester,
+ * 1996, 32] Section 5; which is mostly the same as
+ * [Jezequel, J.M.: Building a global time on parallel machines, Proceedings
+ * of the 3rd International Workshop on Distributed Algorithms, LNCS, volume
+ * 392, 136–147, 1989] Section 5
  *
  * Args:
- *   traceSetContext: traceset
+ *   allFactors:   offset and drift between each pair of traces
+ *
+ * Returns:
+ *   Factors[traceNb] synchronization factors for each trace
  */
-void syncTraceset(LttvTracesetContext* const traceSetContext)
+GArray* reduceFactors(AllFactors* const allFactors)
 {
-       SyncState* syncState;
-       struct timeval startTime, endTime;
-       struct rusage startUsage, endUsage;
-       GList* result;
-       int retval;
-
-       if (optionSync == FALSE)
+       GArray* factors;
+       double** distances;
+       unsigned int** predecessors;
+       double* distanceSums;
+       unsigned int* references;
+       unsigned int i, j;
+       const unsigned int traceNb= allFactors->traceNb;
+
+       // Solve the all-pairs shortest path problem using the Floyd-Warshall
+       // algorithm
+       floydWarshall(allFactors, &distances, &predecessors);
+
+       /* Find the reference for each node
+        *
+        * First calculate, for each node, the sum of the distances to each other
+        * node it can reach.
+        *
+        * Then, go through each "island" of traces to find the trace that has the
+        * lowest distance sum. Assign this trace as the reference to each trace
+        * of the island.
+        */
+       distanceSums= malloc(traceNb * sizeof(double));
+       for (i= 0; i < traceNb; i++)
        {
-               g_debug("Not synchronizing traceset because option is disabled");
-               return;
-       }
-
-       if (optionSyncStats)
-       {
-               gettimeofday(&startTime, 0);
-               getrusage(RUSAGE_SELF, &startUsage);
+               distanceSums[i]= 0.;
+               for (j= 0; j < traceNb; j++)
+               {
+                       distanceSums[i]+= distances[i][j];
+               }
        }
 
-       // Initialize data structures
-       syncState= malloc(sizeof(SyncState));
-       syncState->traceNb= lttv_traceset_number(traceSetContext->ts);
-
-       if (optionSyncStats)
+       references= malloc(traceNb * sizeof(unsigned int));
+       for (i= 0; i < traceNb; i++)
        {
-               syncState->stats= true;
+               references[i]= UINT_MAX;
        }
-       else
+       for (i= 0; i < traceNb; i++)
        {
-               syncState->stats= false;
+               if (references[i] == UINT_MAX)
+               {
+                       unsigned int reference;
+                       double distanceSumMin;
+
+                       // A node is its own reference by default
+                       reference= i;
+                       distanceSumMin= INFINITY;
+                       for (j= 0; j < traceNb; j++)
+                       {
+                               if (distances[i][j] != INFINITY && distanceSums[j] <
+                                       distanceSumMin)
+                               {
+                                       reference= j;
+                                       distanceSumMin= distanceSums[j];
+                               }
+                       }
+                       for (j= 0; j < traceNb; j++)
+                       {
+                               if (distances[i][j] != INFINITY)
+                               {
+                                       references[j]= reference;
+                               }
+                       }
+               }
        }
 
-       syncState->processingData= NULL;
-       if (optionSyncNull)
+       for (i= 0; i < traceNb; i++)
        {
-               result= g_queue_find_custom(&processingModules, "LTTV-null",
-                       &gcfCompareProcessing);
+               free(distances[i]);
        }
-       else
+       free(distances);
+       free(distanceSums);
+
+       /* For each trace, calculate the factors based on their corresponding
+        * tree. The tree is rooted at the reference and the shortest path to each
+        * other nodes are the branches.
+        */
+       factors= g_array_sized_new(FALSE, FALSE, sizeof(Factors),
+               traceNb);
+       g_array_set_size(factors, traceNb);
+       for (i= 0; i < traceNb; i++)
        {
-               result= g_queue_find_custom(&processingModules, "LTTV-standard",
-                       &gcfCompareProcessing);
+               getFactors(allFactors, predecessors, references, i, &g_array_index(factors,
+                               Factors, i));
        }
-       g_assert(result != NULL);
-       syncState->processingModule= (ProcessingModule*) result->data;
-       syncState->processingModule->initProcessing(syncState, traceSetContext);
 
-       syncState->matchingData= NULL;
-       syncState->analysisData= NULL;
-       if (optionSyncNull)
+       for (i= 0; i < traceNb; i++)
        {
-               syncState->matchingModule= NULL;
-               syncState->analysisModule= NULL;
+               free(predecessors[i]);
        }
-       else
+       free(predecessors);
+       free(references);
+
+       return factors;
+}
+
+
+/*
+ * Perform an all-source shortest path search using the Floyd-Warshall
+ * algorithm.
+ *
+ * The algorithm is implemented accoding to the description here:
+ * http://web.mit.edu/urban_or_book/www/book/chapter6/6.2.2.html
+ *
+ * Args:
+ *   allFactors:   offset and drift between each pair of traces
+ *   distances:    resulting matrix of the length of the shortest path between
+ *                 two nodes. If there is no path between two nodes, the
+ *                 length is INFINITY
+ *   predecessors: resulting matrix of each node's predecessor on the shortest
+ *                 path between two nodes
+ */
+static void floydWarshall(AllFactors* const allFactors, double*** const
+       distances, unsigned int*** const predecessors)
+{
+       unsigned int i, j, k;
+       const unsigned int traceNb= allFactors->traceNb;
+       PairFactors** const pairFactors= allFactors->pairFactors;
+
+       // Setup initial conditions
+       *distances= malloc(traceNb * sizeof(double*));
+       *predecessors= malloc(traceNb * sizeof(unsigned int*));
+       for (i= 0; i < traceNb; i++)
        {
-               g_assert(g_queue_get_length(&matchingModules) == 1);
-               syncState->matchingModule= (MatchingModule*)
-                       g_queue_peek_head(&matchingModules);
-               syncState->matchingModule->initMatching(syncState);
-
-               result= g_queue_find_custom(&analysisModules, optionSyncAnalysis,
-                       &gcfCompareAnalysis);
-               if (result != NULL)
+               (*distances)[i]= malloc(traceNb * sizeof(double));
+               for (j= 0; j < traceNb; j++)
                {
-                       syncState->analysisModule= (AnalysisModule*) result->data;
-                       syncState->analysisModule->initAnalysis(syncState);
+                       if (i == j)
+                       {
+                               g_assert(pairFactors[i][j].type == EXACT);
+
+                               (*distances)[i][j]= 0.;
+                       }
+                       else
+                       {
+                               if (pairFactors[i][j].type == ACCURATE ||
+                                       pairFactors[i][j].type == APPROXIMATE)
+                               {
+                                       (*distances)[i][j]= pairFactors[i][j].accuracy;
+                               }
+                               else if (pairFactors[j][i].type == ACCURATE ||
+                                       pairFactors[j][i].type == APPROXIMATE)
+                               {
+                                       (*distances)[i][j]= pairFactors[j][i].accuracy;
+                               }
+                               else
+                               {
+                                       (*distances)[i][j]= INFINITY;
+                               }
+                       }
                }
-               else
+
+               (*predecessors)[i]= malloc(traceNb * sizeof(unsigned int));
+               for (j= 0; j < traceNb; j++)
                {
-                       g_error("Analysis module '%s' not found", optionSyncAnalysis);
+                       if (i != j)
+                       {
+                               (*predecessors)[i][j]= i;
+                       }
+                       else
+                       {
+                               (*predecessors)[i][j]= UINT_MAX;
+                       }
                }
        }
 
-       // Process traceset
-       lttv_process_traceset_seek_time(traceSetContext, ltt_time_zero);
-       lttv_process_traceset_middle(traceSetContext, ltt_time_infinite,
-               G_MAXULONG, NULL);
-       lttv_process_traceset_seek_time(traceSetContext, ltt_time_zero);
-
-       syncState->processingModule->finalizeProcessing(syncState);
-
-       if (syncState->processingModule->printProcessingStats != NULL)
+       // Run the iterations
+       for (k= 0; k < traceNb; k++)
        {
-               syncState->processingModule->printProcessingStats(syncState);
-       }
+               for (i= 0; i < traceNb; i++)
+               {
+                       for (j= 0; j < traceNb; j++)
+                       {
+                               double distanceMin;
 
-       syncState->processingModule->destroyProcessing(syncState);
-       if (syncState->matchingModule != NULL)
-       {
-               syncState->matchingModule->destroyMatching(syncState);
-       }
-       if (syncState->analysisModule != NULL)
-       {
-               syncState->analysisModule->destroyAnalysis(syncState);
-       }
+                               distanceMin= MIN((*distances)[i][j], (*distances)[i][k] +
+                                       (*distances)[k][j]);
 
-       free(syncState);
+                               if (distanceMin != (*distances)[i][j])
+                               {
+                                       (*predecessors)[i][j]= (*predecessors)[k][j];
+                               }
 
-       if (optionSyncStats)
-       {
-               gettimeofday(&endTime, 0);
-               retval= getrusage(RUSAGE_SELF, &endUsage);
-
-               timeDiff(&endTime, &startTime);
-               timeDiff(&endUsage.ru_utime, &startUsage.ru_utime);
-               timeDiff(&endUsage.ru_stime, &startUsage.ru_stime);
-
-               printf("Synchronization time:\n");
-               printf("\treal time: %ld.%06ld\n", endTime.tv_sec, endTime.tv_usec);
-               printf("\tuser time: %ld.%06ld\n", endUsage.ru_utime.tv_sec,
-                       endUsage.ru_utime.tv_usec);
-               printf("\tsystem time: %ld.%06ld\n", endUsage.ru_stime.tv_sec,
-                       endUsage.ru_stime.tv_usec);
+                               (*distances)[i][j]= distanceMin;
+                       }
+               }
        }
 }
 
 
 /*
- * Calculate the elapsed time between two timeval values
+ * Cummulate the time correction factors to convert a node's time to its
+ * reference's time.
+ * This function recursively calls itself until it reaches the reference node.
  *
  * Args:
- *   end:          end time, result is also stored in this structure
- *   start:        start time
+ *   allFactors:   offset and drift between each pair of traces
+ *   predecessors: matrix of each node's predecessor on the shortest
+ *                 path between two nodes
+ *   references:   reference node for each node
+ *   traceNum:     node for which to find the factors
+ *   factors:      resulting factors
  */
-static void timeDiff(struct timeval* const end, const struct timeval* const start)
+static void getFactors(AllFactors* const allFactors, unsigned int** const
+       predecessors, unsigned int* const references, const unsigned int traceNum,
+       Factors* const factors)
 {
-               if (end->tv_usec >= start->tv_usec)
+       unsigned int reference;
+       PairFactors** const pairFactors= allFactors->pairFactors;
+
+       reference= references[traceNum];
+
+       if (reference == traceNum)
+       {
+               factors->offset= 0.;
+               factors->drift= 1.;
+       }
+       else
+       {
+               Factors previousVertexFactors;
+
+               getFactors(allFactors, predecessors, references,
+                       predecessors[reference][traceNum], &previousVertexFactors);
+
+               /* Convert the time from traceNum to reference;
+                * pairFactors[row][col] converts the time from col to row, invert the
+                * factors as necessary */
+
+               if (pairFactors[reference][traceNum].approx != NULL)
                {
-                       end->tv_sec-= start->tv_sec;
-                       end->tv_usec-= start->tv_usec;
+                       factors->offset= previousVertexFactors.drift *
+                               pairFactors[reference][traceNum].approx->offset +
+                               previousVertexFactors.offset;
+                       factors->drift= previousVertexFactors.drift *
+                               pairFactors[reference][traceNum].approx->drift;
+               }
+               else if (pairFactors[traceNum][reference].approx != NULL)
+               {
+                       factors->offset= previousVertexFactors.drift * (-1. *
+                               pairFactors[traceNum][reference].approx->offset /
+                               pairFactors[traceNum][reference].approx->drift) +
+                               previousVertexFactors.offset;
+                       factors->drift= previousVertexFactors.drift * (1. /
+                               pairFactors[traceNum][reference].approx->drift);
                }
                else
                {
-                       end->tv_sec= end->tv_sec - start->tv_sec - 1;
-                       end->tv_usec= end->tv_usec - start->tv_usec + 1e6;
+                       g_assert_not_reached();
                }
+       }
 }
 
 
@@ -265,21 +390,44 @@ static void timeDiff(struct timeval* const end, const struct timeval* const star
  * A GCompareFunc for g_slist_find_custom()
  *
  * Args:
- *   a:            AnalysisModule*, element's data
+ *   a:            ProcessingModule*, element's data
  *   b:            char*, user data to compare against
  *
  * Returns:
- *   0 if the analysis module a's name is b
+ *   0 if the processing module a's name is b
  */
-static gint gcfCompareAnalysis(gconstpointer a, gconstpointer b)
+gint gcfCompareProcessing(gconstpointer a, gconstpointer b)
 {
-       const AnalysisModule* analysisModule;
+       const ProcessingModule* processingModule;
        const char* name;
 
-       analysisModule= (const AnalysisModule*)a;
-       name= (const char*)b;
+       processingModule= (const ProcessingModule*) a;
+       name= (const char*) b;
 
-       return strncmp(analysisModule->name, name, strlen(analysisModule->name) +
+       return strncmp(processingModule->name, name,
+               strlen(processingModule->name) + 1);
+}
+
+
+/*
+ * A GCompareFunc for g_slist_find_custom()
+ *
+ * Args:
+ *   a:            MatchingModule*, element's data
+ *   b:            char*, user data to compare against
+ *
+ * Returns:
+ *   0 if the matching module a's name is b
+ */
+gint gcfCompareMatching(gconstpointer a, gconstpointer b)
+{
+       const MatchingModule* matchingModule;
+       const char* name;
+
+       matchingModule= (const MatchingModule*) a;
+       name= (const char*) b;
+
+       return strncmp(matchingModule->name, name, strlen(matchingModule->name) +
                1);
 }
 
@@ -288,22 +436,22 @@ static gint gcfCompareAnalysis(gconstpointer a, gconstpointer b)
  * A GCompareFunc for g_slist_find_custom()
  *
  * Args:
- *   a:            ProcessingModule*, element's data
+ *   a:            AnalysisModule*, element's data
  *   b:            char*, user data to compare against
  *
  * Returns:
  *   0 if the analysis module a's name is b
  */
-static gint gcfCompareProcessing(gconstpointer a, gconstpointer b)
+gint gcfCompareAnalysis(gconstpointer a, gconstpointer b)
 {
-       const ProcessingModule* processingModule;
+       const AnalysisModule* analysisModule;
        const char* name;
 
-       processingModule= (const ProcessingModule*)a;
-       name= (const char*)b;
+       analysisModule= (const AnalysisModule*) a;
+       name= (const char*) b;
 
-       return strncmp(processingModule->name, name,
-               strlen(processingModule->name) + 1);
+       return strncmp(analysisModule->name, name, strlen(analysisModule->name) +
+               1);
 }
 
 
@@ -316,14 +464,8 @@ static gint gcfCompareProcessing(gconstpointer a, gconstpointer b)
  *   data:         AnalysisModule*
  *   user_data:    GString*, concatenated names
  */
-static void gfAppendAnalysisName(gpointer data, gpointer user_data)
+void gfAppendAnalysisName(gpointer data, gpointer user_data)
 {
        g_string_append((GString*) user_data, ((AnalysisModule*) data)->name);
        g_string_append((GString*) user_data, ", ");
 }
-
-
-LTTV_MODULE("sync", "Synchronize traces", \
-       "Synchronizes a traceset based on the correspondance of network events", \
-       init, destroy, "option")
-
This page took 0.030181 seconds and 4 git commands to generate.