Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Assuming that finding the exact sub-pool usage maximum for any given
distribution is NP complete (not proven).
Assuming that finding the exact sub-pool usage maximum for any given
distribution is NP complete (not proven).
-Taking into account unbalance ratios (tested programmatically by
+Taking into account sub-class size unbalance (tested programmatically by
randomly taking N entries from 256, calculating the distribution for
each bit (number of nodes for which bit is one/zero), and calculating
the difference in number of nodes for each bit, choosing the minimum
difference -- for millions of runs).
randomly taking N entries from 256, calculating the distribution for
each bit (number of nodes for which bit is one/zero), and calculating
the difference in number of nodes for each bit, choosing the minimum
difference -- for millions of runs).
-tot entries unbalance largest linear array (stat. approx.)
+We start from the "ideal" population size (with no unbalance), and do a
+fixed point to find the appropriate population size.
+
+tot entries subclass extra items largest linear array (stat. approx.)
---------------------------------------------------------------------
---------------------------------------------------------------------
-48 entries: 2 (98%) 24+1=25 (target ~50/2=25)
-54 entries: 2 (97%) 27+1=28 (target ~56/2=28)
+48 entries: 1 (98%) 24+1=25 (target ~50/2=25)
+54 entries: 1 (97%) 27+1=28 (target ~56/2=28)
Note: there exists rare worse cases where the unbalance is larger, but
it happens _very_ rarely. But need to provide a fallback if the subclass
Note: there exists rare worse cases where the unbalance is larger, but
it happens _very_ rarely. But need to provide a fallback if the subclass
For pool of size 4, we need to approximate what is the maximum unbalance
we can get for choice of distributions grouped by pairs of bits.
For pool of size 4, we need to approximate what is the maximum unbalance
we can get for choice of distributions grouped by pairs of bits.
-tot entries unbalance largest linear array (stat. approx.)
+tot entries subclass extra items largest linear array (stat. approx.)
---------------------------------------------------------------------
---------------------------------------------------------------------
-92 entries: 8 (99%) 23+2=25 (target: ~100/4=25)
-104 entries: 8 (99%) 26+2=28 (target: ~112/4=28)
+92 entries: 2 (99%) 23+2=25 (target: ~100/4=25)
+104 entries: 2 (99%) 26+2=28 (target: ~112/4=28)
Note: there exists rare worse cases where the unbalance is larger, but
Note: there exists rare worse cases where the unbalance is larger, but
- * This program generates random populations, and shows the worse-case
- * unbalance, as well as the distribution of unbalance encountered.
- * Remember that the unbalance is the delta between the lowest and
- * largest population. Therefore, to get the delta between the subclass
- * size and the actual number of items, we need to divide the unbalance
- * by the number of subclasses (by hand).
+ * This program generates random populations, and shows the largest
+ * sub-class generated, as well as the distribution of sub-class size
+ * for the largest sub-class of each population.
static uint8_t nr_one[8];
static uint8_t nr_2d_11[8][8];
static uint8_t nr_2d_10[8][8];
static uint8_t nr_one[8];
static uint8_t nr_2d_11[8][8];
static uint8_t nr_2d_10[8][8];
-static int global_max_minunbalance = 0;
+static int global_max_minsubclass_len = 0;
-static unsigned int unbalance_distrib[256];
+static unsigned int subclass_len_distrib[256];
static
uint8_t random_char(void)
static
uint8_t random_char(void)
static
void stat_count(void)
{
static
void stat_count(void)
{
- int minunbalance = INT_MAX;
+ int minsubclass_len = INT_MAX;
if (nr_distrib == 2) {
int i;
if (nr_distrib == 2) {
int i;
diff = (int) nr_one[i] * 2 - sel_pool_len;
if (diff < 0)
diff = -diff;
diff = (int) nr_one[i] * 2 - sel_pool_len;
if (diff < 0)
diff = -diff;
- if (diff < minunbalance) {
- minunbalance = diff;
+ if ((diff >> 1) < minsubclass_len) {
+ minsubclass_len = diff >> 1;
/* Get max linear array size */
if (diff[1] > diff[0])
diff[0] = diff[1];
/* Get max linear array size */
if (diff[1] > diff[0])
diff[0] = diff[1];
- if (diff[0] < minunbalance) {
- minunbalance = diff[0];
+ if ((diff[0] >> 2) < minsubclass_len) {
+ minsubclass_len = diff[0] >> 2;
- if (minunbalance > global_max_minunbalance) {
- global_max_minunbalance = minunbalance;
+ if (minsubclass_len > global_max_minsubclass_len) {
+ global_max_minsubclass_len = minsubclass_len;
- unbalance_distrib[minunbalance]++;
+ subclass_len_distrib[minsubclass_len]++;
unsigned long long tot = 0;
for (i = 0; i < 256; i++) {
unsigned long long tot = 0;
for (i = 0; i < 256; i++) {
- tot += unbalance_distrib[i];
+ tot += subclass_len_distrib[i];
}
if (tot == 0)
return;
printf("Distribution:\n");
for (i = 0; i < 256; i++) {
printf("(%u, %u, %llu%%) ",
}
if (tot == 0)
return;
printf("Distribution:\n");
for (i = 0; i < 256; i++) {
printf("(%u, %u, %llu%%) ",
- i, unbalance_distrib[i],
- 100 * (unsigned long long) unbalance_distrib[i] / tot);
+ i, subclass_len_distrib[i],
+ 100 * (unsigned long long) subclass_len_distrib[i] / tot);
static
void print_stat(uint64_t i)
{
static
void print_stat(uint64_t i)
{
- printf("after %llu pools, global_max_minunbalance: %d\n",
- (unsigned long long) i, global_max_minunbalance);
+ printf("after %llu pools, global_max_minsubclass_len extra: %d\n",
+ (unsigned long long) i, global_max_minsubclass_len);