什么是找到平均大小分布而不必声明一个巨大的2D数组的有效方法?

考虑L×L大小矩阵M,其条目可以是0或1.每个元素是1,概率为p,0是概率1-p。 我将标记为1的元素称为黑色元素,将标记为0的元素称为白色元素。 我正在尝试编写一个代码:

  • 生成随机矩阵,条目为0和1。 我需要输入矩阵L的大小和概率p。

  • 标记属于同一群集的所有黑色元素具有相同的编号。 ( 将一个黑色元素簇定义为单元格图形中的最大连通分量,其值为1,其中边连接的行和列之间的差异最多为1(因此每个单元最多有8个邻居)。如果矩阵的两个黑色元素共享边或顶点,则认为它们属于同一个黑色簇。即,将矩阵视为大方块,将元素视为大方块中的小方块。

在此处输入图像描述

  • 在从p = 0到p = 100的循环内(我将概率p称为百分比):将每个大小的黑色簇的数量写入对应于该p值的文件。

例如:

输入 :p = 30,L = 50

输出 (写入每个p的数据文件;因此该程序创建了101个数据文件,从p = 0到p = 100):

1 100(即有100个大小为1的黑色簇)

2 20(即有20个黑色簇,大小为2)

3 15(即有15个黑色簇,大小为3)

等等…

#include "stdafx.h" #include  #include  #include  #include  #include  #include  int *labels; int n_labels = 0; int uf_find(int x) { int y = x; while (labels[y] != y) y = labels[y]; while (labels[x] != x) { int z = labels[x]; labels[x] = y; x = z; } return y; } int uf_union(int x, int y) { return labels[uf_find(x)] = uf_find(y); } int uf_make_set(void) { labels[0] ++; assert(labels[0] b?a:b) #define min(a,b) (a>b?b:a) int hoshen_kopelman(int **matrix, int m, int n) { uf_initialize(m * n / 2); for (int i = 0; i<m; i++) for (int j = 0; j<n; j++) if (matrix[i][j]) { int up = (i == 0 ? 0 : matrix[i - 1][j]); int left = (j == 0 ? 0 : matrix[i][j - 1]); switch (!!up + !!left) { case 0: matrix[i][j] = uf_make_set(); break; case 1: matrix[i][j] = max(up, left); break; case 2: matrix[i][j] = uf_union(up, left); break; } int north_west; if (i == 0 || j == 0) north_west = 0; else north_west = matrix[i - 1][j - 1]; int north_east; if (i == 0 || j == (n - 1)) north_east = 0; else north_east = matrix[i - 1][j + 1]; if (!!north_west == 1) { if (i != 0 && j != 0) { //if (rand() % 2 == 1) //{ if (matrix[i][j - 1] == 0 && matrix[i - 1][j] == 0) { if (!!matrix[i][j] == 0) matrix[i][j] = north_west; else uf_union(north_west, matrix[i][j]); } //} } else if (i == 0 || j == 0) { } } if (!!north_east == 1) { if (i != 0 && j != n - 1) { //if (rand() % 2 == 1) //{ if (matrix[i - 1][j] == 0 && matrix[i][j + 1] == 0) { if (!!matrix[i][j] == 0) matrix[i][j] = north_east; else uf_union(north_east, matrix[i][j]); } //} } else if (i == 0 || j == n - 1) { } } } int *new_labels = (int*)calloc(sizeof(int), n_labels); for (int i = 0; i<m; i++) for (int j = 0; j<n; j++) if (matrix[i][j]) { int x = uf_find(matrix[i][j]); if (new_labels[x] == 0) { new_labels[0]++; new_labels[x] = new_labels[0]; } matrix[i][j] = new_labels[x]; } int total_clusters = new_labels[0]; free(new_labels); uf_done(); return total_clusters; } void check_labelling(int **matrix, int m, int n) { int N, S, E, W; for (int i = 0; i<m; i++) for (int j = 0; j<n; j++) if (matrix[i][j]) { N = (i == 0 ? 0 : matrix[i - 1][j]); S = (i == m - 1 ? 0 : matrix[i + 1][j]); E = (j == n - 1 ? 0 : matrix[i][j + 1]); W = (j == 0 ? 0 : matrix[i][j - 1]); assert(N == 0 || matrix[i][j] == N); assert(S == 0 || matrix[i][j] == S); assert(E == 0 || matrix[i][j] == E); assert(W == 0 || matrix[i][j] == W); } } int cluster_count(int **matrix, int size, int N) { int i; int j; int count = 0; for (i = 0; i < size; i++) { for (j = 0; j < size; j++) { if (matrix[i][j] == N) count++; } } return count; } int main(int argc, char **argv) { srand((unsigned int)time(0)); int p = 0; printf("Enter number of rows/columns: "); int size = 0; scanf("%d", &size); printf("\n"); FILE *fp; printf("Enter number of averaging iterations: "); int iterations = 0; scanf("%d", &iterations); for (int p = 0; p <= 100; p++) { char str[100]; sprintf(str, "BlackSizeDistribution%03i.txt", p); fp = fopen(str, "w"); int **matrix; matrix = (int**)calloc(10, sizeof(int*)); int** matrix_new = (int**)calloc(10, sizeof(int*)); matrix_new = (int **)realloc(matrix, sizeof(int*) * size); matrix = matrix_new; for (int i = 0; i < size; i++) { matrix[i] = (int *)calloc(size, sizeof(int)); for (int j = 0; j < size; j++) { int z = rand() % 100; z = z + 1; if (p == 0) matrix[i][j] = 0; if (z <= p) matrix[i][j] = 1; else matrix[i][j] = 0; } } hoshen_kopelman(matrix, size, size); int highest = matrix[0][0]; for (int i = 0; i < size; i++) for (int j = 0; j < size; j++) if (highest < matrix[i][j]) highest = matrix[i][j]; int* counter = (int*)calloc(sizeof(int*), highest + 1); int high = 0; for (int k = 1; k  high) high = counter[k]; } int* size_distribution = (int*)calloc(sizeof(int*), high + 1); for (int y = 1; y <= high; y++) { int count = 0; for (int z = 1; z <= highest; z++) if (counter[z] == y) count++; size_distribution[y] = count; } for (int k = 1; k <= high; k++) { fprintf(fp, "\n%d\t%d", k, size_distribution[k]); printf("\n%d\t%d", k, size_distribution[k]); } printf("\n"); check_labelling(matrix, size, size); for (int i = 0; i < size; i++) free(matrix[i]); free(matrix); } } 

我使用该程序输出的数据文件,使用Gnuplot为每个p的范围从0到100创建条形图。

一些示例图:

在此处输入图像描述

这一个对应于输入p = 3和L = 100

在此处输入图像描述

这一个对应于p = 90和L = 100


好吧,所以我想这对于我所遇到的实际问题来说已经足够了。

我写的程序只输出每p值1次迭代的值。 由于该程序用于科学计算目的,我需要更准确的值,因此我需要输出“平均”大小分布值; 超过50或100次迭代。 我不确定如何进行平均。

更清楚这是我想要的:

假设程序的三个不同运行的输出给了我(比如说p = 3,L = 100)

 Size Iteration 1 Iteration 2 Iteration 3 Averaged Value (Over 3 iterations) 1 150 170 180 167 2 10 20 15 18 3 1 2 1 1 4 0 0 1 0 . . . 

我想做类似的事情,除了我需要执行50或100次迭代的平均以获得我的数学模型的准确值。 顺便说一句,我真的不需要输出每次迭代的值,即迭代1,迭代2,迭代3,…在数据文件中。 我需要“打印”的是上表中的第一列和最后一列(具有平均值)。

现在,我可能需要修改其主函数来进行平均。

这是主要function:

 int main(int argc, char **argv) { srand((unsigned int)time(0)); int p = 0; printf("Enter number of rows/columns: "); int size = 0; scanf("%d", &size); printf("\n"); FILE *fp; printf("Enter number of averaging iterations: "); int iterations = 0; scanf("%d", &iterations); for (int p = 0; p <= 100; p++) { char str[100]; sprintf(str, "BlackSizeDistribution%03i.txt", p); fp = fopen(str, "w"); int **matrix; matrix = (int**)calloc(10, sizeof(int*)); int** matrix_new = (int**)calloc(10, sizeof(int*)); matrix_new = (int **)realloc(matrix, sizeof(int*) * size); matrix = matrix_new; for (int i = 0; i < size; i++) { matrix[i] = (int *)calloc(size, sizeof(int)); for (int j = 0; j < size; j++) { int z = rand() % 100; z = z + 1; if (p == 0) matrix[i][j] = 0; if (z <= p) matrix[i][j] = 1; else matrix[i][j] = 0; } } hoshen_kopelman(matrix, size, size); int highest = matrix[0][0]; for (int i = 0; i < size; i++) for (int j = 0; j < size; j++) if (highest < matrix[i][j]) highest = matrix[i][j]; int* counter = (int*)calloc(sizeof(int*), highest + 1); int high = 0; for (int k = 1; k  high) high = counter[k]; } int* size_distribution = (int*)calloc(sizeof(int*), high + 1); for (int y = 1; y <= high; y++) { int count = 0; for (int z = 1; z <= highest; z++) if (counter[z] == y) count++; size_distribution[y] = count; } for (int k = 1; k <= high; k++) { fprintf(fp, "\n%d\t%d", k, size_distribution[k]); printf("\n%d\t%d", k, size_distribution[k]); } printf("\n"); check_labelling(matrix, size, size); for (int i = 0; i < size; i++) free(matrix[i]); free(matrix); } } 

我想到的方法之一是声明一个2D数组的大小(largest size of black cluster possible for that) x (number of averaging iterations I want) ,p循环的每次运行,以及程序结束时从2Darrays中提取每个p的平均大小分布。 在尺寸为100 x 100(比如)的矩阵中,最大尺寸的黑色簇可能是10,000。 但是,对于较小的p值(比如p = 1,p = 20,……),这样的大尺寸星团甚至都不会产生! 因此,在开始时创建如此大的2Darrays将是一个可怕的内存空间浪费,执行需要数天 ! (请记住,我需要运行此程序L = 1000和L = 5000,如果可能,甚至L = 10000为我的目的)

必须有一些其他更有效的方法来做到这一点。 但我不知道是什么。 任何帮助表示赞赏。

啊,一个非常接近我自己心的话题! 🙂

因为您正在收集统计数据,并且确切的配置仅在收集所述统计数据的持续时间内有趣,您只需要两个L×L矩阵:一个用于保存颜色信息(一个白色,0到L×L黑色,所以一个类型可以保存0到L 2 +1之间的整数,包括端点); 另一个用于保存该类型的元素数(0到L 2之间的整数,包括0和L 2 )。

为了保持每个L和p值的每个大小的黑色簇的数量,在多次迭代中,您将需要第三个L×L + 1个元素的arrays; 但请注意,如果使用相同的L和p进行N次迭代,其值可以达到N×L×L。

标准的C伪随机数发生器很可怕; 不要使用它。 使用Mersenne Twister或我最喜欢的Xorshift64 * (或相关的Xorshift1024 *)。 虽然Xorshift64 *速度极快,但它的分布对于像你这样的模拟而言是足够“随机”的。

而不是只存储“黑色”或“白色”,而是存储“不感兴趣”或群集ID; 最初,所有(未连接的)黑人都有唯一的群集ID。 这样,您可以实现不相交的集合 ,并将连接的单元格非常有效地连接到集群。

以下是我在伪代码中实现的方法:

 Let id[L*L] be the color information; index is L*row+col Let idcount[L*L+1] be the id count over one iteration Let count[L*L+1] be the number of clusters across N iterations Let limit = (p / 100.0) * PRNG_MAX Let white = L*L (and blacks are 0 to L*L-1, inclusive) Clear count[] to all zeros For iter = 1 to N, inclusive: For row = 0 to L-1, inclusive: If PRNG() <= limit: Let id[L*row] = L*row Else: Let id[L*row] = white End If For col = 1 to L-1, inclusive: If PRNG() <= limit: If id[L*row+col-1] == white: Let id[L*row+col] = L*row + col Else: Let id[L*row+col] = id[L*row+col-1] End If Else: Let id[L*row+col] = white End If End For End For 

请注意,我在生成单元ID时执行水平连接传递,只需对连续的黑色单元重用相同的ID即可。 此时,如果PRNG()返回0和PRNG_MAX之间的无符号整数(包括0和PRNG_MAX PRNG() ,则id[][]现在以p %的概率填充各种ID的黑色单元格。

接下来,您进行连接传递。 由于水平连接的单元格已经连接,因此您需要执行垂直和两个对角线。 请注意,加入时应使用路径展平,以​​提高效率。

  For row = 0 to L-2, inclusive: For col = 0 to L-1, inclusive: If (id[L*row+col] != white) And (id[L*row+L+col] != white): Join id[L*row+col] and id[L*row+L+col] End If End For End For For row = 0 to L-2, inclusive: For col = 0 to L-2, inclusive: If (id[L*row+col] != white) And (id[L*row+L+col+1] != white): Join id[L*row+col] and id[L*row+L+col+1] End If End For End For For row = 1 to L-1, inclusive: For col = 0 to L-2, inclusive: If (id[L*row+col] != white) And (id[L*row-L+col+1] != white): Join id[L*row+col] and id[L*row-L+col+1] End If End For End For 

当然,您应该组合循环,以保持最佳的访问位置。 (分别执行col == 0 ,并在单独的内循环中使用row == 0 ,最小化if子句,因为这些子句往往很慢。)你甚至可以使id数组(L + 1) 2的大小,填充外部具有白色的细胞边缘,以简化上述连接成单个双环,以4L + 4个额外细胞的成本使用。

此时,您需要展平每个不相交的集合。 非white每个ID值都是L*row+col (表示“this”),或者是对另一个单元格的引用。 展平意味着对于每个ID,我们在链中找到最终的ID,并使用它代替:

  For i = 0 to L*L-1, inclusive: If (id[i] != white): Let k = i While (id[k] != k): k = id[k] End While id[i] = k End If End For 

接下来,我们需要计算具有特定ID的单元格数:

  Clear idn[] For i = 0 to L*L-1, inclusive: Increment idn[id[i]] End For 

因为我们对N次迭代中特定大小的簇的数量感兴趣,所以我们可以简单地通过在此迭代中找到的每个特定大小的簇更新count数组:

  For i = 1 to L*L, inclusive: Increment count[idn[i]] End For Let count[0] = 0 End For 

此时count[i]包含在N次迭代中找到的i单元的黑色簇的数量; 换句话说,它是在N次迭代期间看到的簇大小的直方图(离散分布)。


上述的一种实现可以如下。

(第一个实现有数组大小和集群标签的问题;这个版本将该部分拆分成一个单独的文件,并且更容易validation是否正确。但是,这只是第二个版本,所以我不认为它是生产 - 质量。)

首先,操作矩阵的函数cluster.h

 #ifndef CLUSTER_H #define CLUSTER_H #include  #include  #include  #include  #include  /* This is in the public domain. Written by Nominal Animal  */ typedef uint32_t cluster_label; typedef uint32_t cluster_size; static size_t rows = 0; /* Number of mutable rows */ static size_t cols = 0; /* Number of mutable columns */ static size_t nrows = 0; /* Number of accessible rows == rows + 1 */ static size_t ncols = 0; /* Number of accessible columns == cols + 1 */ static size_t cells = 0; /* Total number of cells == nrows*ncols */ static size_t empty = 0; /* Highest-numbered label == nrows*ncols - 1 */ static size_t sizes = 0; /* Number of possible cluster sizes == rows*cols + 1 */ #define INDEX(row, col) (ncols*(row) + (col)) cluster_label *label = NULL; /* 2D contents: label[ncols*(row) + (col)] */ cluster_size *occurs = NULL; /* Number of occurrences of each label value */ /* Xorshift64* PRNG used for generating the matrix. */ static uint64_t prng_state = 1; static inline uint64_t prng_u64(void) { uint64_t state = prng_state; state ^= state >> 12; state ^= state << 25; state ^= state >> 27; prng_state = state; return state * UINT64_C(2685821657736338717); } static inline uint64_t prng_u64_less(void) { uint64_t result; do { result = prng_u64(); } while (result == UINT64_C(18446744073709551615)); return result; } static inline uint64_t prng_seed(const uint64_t seed) { if (seed) prng_state = seed; else prng_state = 1; return prng_state; } static inline uint64_t prng_randomize(void) { int discard = 1024; uint64_t seed; FILE *in; /* By default, use time. */ prng_seed( ((uint64_t)time(NULL) * 2832631) ^ ((uint64_t)clock() * 1120939) ); #ifdef __linux__ /* On Linux, read from /dev/urandom. */ in = fopen("/dev/urandom", "r"); if (in) { if (fread(&seed, sizeof seed, 1, in) == 1) prng_seed(seed); fclose(in); } #endif /* Churn the state a bit. */ seed = prng_state; while (discard-->0) { seed ^= seed >> 12; seed ^= seed << 25; seed ^= seed >> 27; } return prng_state = seed; } static inline void cluster_free(void) { free(occurs); occurs = NULL; free(label); label = NULL; rows = 0; cols = 0; nrows = 0; ncols = 0; cells = 0; empty = 0; sizes = 0; } static int cluster_init(const size_t want_cols, const size_t want_rows) { cluster_free(); if (want_cols < 1 || want_rows < 1) return -1; /* Invalid number of rows or columns */ rows = want_rows; cols = want_cols; nrows = rows + 1; ncols = cols + 1; cells = nrows * ncols; if ((size_t)(cells / nrows) != ncols || nrows <= rows || ncols <= cols) return -1; /* Size overflows */ empty = nrows * ncols - 1; sizes = rows * cols + 1; label = calloc(cells, sizeof label[0]); occurs = calloc(cells, sizeof occurs[0]); if (!label || !occurs) { cluster_free(); return -1; /* Not enough memory. */ } return 0; } static void join2(size_t i1, size_t i2) { size_t root1 = i1, root2 = i2, root; while (root1 != label[root1]) root1 = label[root1]; while (root2 != label[root2]) root2 = label[root2]; root = root1; if (root > root2) root = root2; while (label[i1] != root) { const size_t temp = label[i1]; label[i1] = root; i1 = temp; } while (label[i2] != root) { const size_t temp = label[i2]; label[i2] = root; i2 = temp; } } static void join3(size_t i1, size_t i2, size_t i3) { size_t root1 = i1, root2 = i2, root3 = i3, root; while (root1 != label[root1]) root1 = label[root1]; while (root2 != label[root2]) root2 = label[root2]; while (root3 != label[root3]) root3 = label[root3]; root = root1; if (root > root2) root = root2; if (root > root3) root = root3; while (label[i1] != root) { const size_t temp = label[i1]; label[i1] = root; i1 = temp; } while (label[i2] != root) { const size_t temp = label[i2]; label[i2] = root; i2 = temp; } while (label[i3] != root) { const size_t temp = label[i3]; label[i3] = root; i3 = temp; } } static void join4(size_t i1, size_t i2, size_t i3, size_t i4) { size_t root1 = i1, root2 = i2, root3 = i3, root4 = i4, root; while (root1 != label[root1]) root1 = label[root1]; while (root2 != label[root2]) root2 = label[root2]; while (root3 != label[root3]) root3 = label[root3]; while (root4 != label[root4]) root4 = label[root4]; root = root1; if (root > root2) root = root2; if (root > root3) root = root3; if (root > root4) root = root4; while (label[i1] != root) { const size_t temp = label[i1]; label[i1] = root; i1 = temp; } while (label[i2] != root) { const size_t temp = label[i2]; label[i2] = root; i2 = temp; } while (label[i3] != root) { const size_t temp = label[i3]; label[i3] = root; i3 = temp; } while (label[i4] != root) { const size_t temp = label[i4]; label[i4] = root; i4 = temp; } } static void cluster_fill(const uint64_t plimit) { size_t r, c; /* Fill the matrix with the specified probability. For efficiency, we use the same label for all horizontally consecutive cells. */ for (r = 0; r < rows; r++) { const size_t imax = ncols*r + cols; cluster_label last; size_t i = ncols*r; last = (prng_u64_less() < plimit) ? i : empty; label[i] = last; while (++i < imax) { if (prng_u64_less() < plimit) { if (last == empty) last = i; } else last = empty; label[i] = last; } label[imax] = empty; } /* Fill the extra row with empty, too. */ { cluster_label *ptr = label + ncols*rows; cluster_label *const end = label + ncols*nrows; while (ptr < end) *(ptr++) = empty; } /* On the very first row, we need to join non-empty cells vertically and diagonally down. */ for (c = 0; c < cols; c++) switch ( ((label[c] < empty) << 0) | ((label[c+ncols] < empty) << 1) | ((label[c+ncols+1] < empty) << 2) ) { /* <1> * * 2 4 */ case 3: /* Join down */ join2(c, c+ncols); break; case 5: /* Join down right */ join2(c, c+ncols+1); break; case 7: /* Join down and down right */ join3(c, c+ncols, c+ncols+1); break; } /* On the other rows, we need to join non-empty cells vertically, diagonally down, and diagonally up. */ for (r = 1; r < rows; r++) { const size_t imax = ncols*r + cols; size_t i; for (i = ncols*r; i < imax; i++) switch ( ((label[i] < empty) << 0) | ((label[i-ncols+1] < empty) << 1) | ((label[i+ncols] < empty) << 2) | ((label[i+ncols+1] < empty) << 3) ) { /* 2 * * <1> * * 4 8 */ case 3: /* Diagonally up */ join2(i, i-ncols+1); break; case 5: /* Down */ join2(i, i+ncols); break; case 7: /* Down and diagonally up */ join3(i, i-ncols+1, i+ncols); break; case 9: /* Diagonally down */ join2(i, i+ncols+1); break; case 11: /* Diagonally up and diagonally down */ join3(i, i-ncols+1, i+ncols+1); break; case 13: /* Down and diagonally down */ join3(i, i+ncols, i+ncols+1); break; case 15: /* Down, diagonally down, and diagonally up */ join4(i, i-ncols+1, i+ncols, i+ncols+1); break; } } /* Flatten the labels, so that all cells belonging to the same cluster will have the same label. */ { const size_t imax = ncols*rows; size_t i; for (i = 0; i < imax; i++) if (label[i] < empty) { size_t k = i, kroot = i; while (kroot != label[kroot]) kroot = label[kroot]; while (label[k] != kroot) { const size_t temp = label[k]; label[k] = kroot; k = temp; } } } /* Count the number of occurrences of each label. */ memset(occurs, 0, (empty + 1) * sizeof occurs[0]); { cluster_label *const end = label + ncols*rows; cluster_label *ptr = label; while (ptr < end) ++occurs[*(ptr++)]; } } #endif /* CLUSTER_H */ 

请注意,要获得完整的概率范围, prng_u64_less()函数永远不会返回尽可能高的uint64_t 。 然而,这种做法有点过分,精确。

main.c解析输入参数,迭代并打印结果:

 #include  #include  #include  #include "cluster.h" /* This program is in the public domain. Written by Nominal Animal  */ int main(int argc, char *argv[]) { uint64_t *count = NULL; uint64_t plimit; unsigned long size, iterations, i; double probability; char dummy; if (argc != 4) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: %s SIZE ITERATIONS PROBABILITY\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, "Where SIZE sets the matrix size (SIZE x SIZE),\n"); fprintf(stderr, " ITERATIONS is the number of iterations, and\n"); fprintf(stderr, " PROBABILITY is the probability [0, 1] of a cell\n"); fprintf(stderr, " being non-empty.\n"); fprintf(stderr, "\n"); return EXIT_FAILURE; } if (sscanf(argv[1], " %lu %c", &size, &dummy) != 1 || size < 1u) { fprintf(stderr, "%s: Invalid matrix size.\n", argv[1]); return EXIT_FAILURE; } if (sscanf(argv[2], " %lu %c", &iterations, &dummy) != 1 || iterations < 1u) { fprintf(stderr, "%s: Invalid number of iterations.\n", argv[2]); return EXIT_FAILURE; } if (sscanf(argv[3], " %lf %c", &probability, &dummy) != 1 || probability < 0.0 || probability > 1.0) { fprintf(stderr, "%s: Invalid probability.\n", argv[3]); return EXIT_FAILURE; } /* Technically, we want plimit = (uint64_t)(0.5 + 18446744073709551615.0*p), but doubles do not have the precision for that. */ if (probability > 0.9999999999999999) plimit = UINT64_C(18446744073709551615); else if (probability <= 0) plimit = UINT64_C(0); else plimit = (uint64_t)(18446744073709551616.0 * probability); /* Try allocating memory for the matrix and the label count array. */ if (size > 2147483646u || cluster_init(size, size)) { fprintf(stderr, "%s: Matrix size is too large.\n", argv[1]); return EXIT_FAILURE; } /* Try allocating memory for the cluster size histogram. */ count = calloc(sizes + 1, sizeof count[0]); if (!count) { fprintf(stderr, "%s: Matrix size is too large.\n", argv[1]); return EXIT_FAILURE; } printf("# Xorshift64* seed is %" PRIu64 "\n", prng_randomize()); printf("# Matrix size is %lu x %lu\n", size, size); printf("# Probability of a cell to be non-empty is %.6f%%\n", 100.0 * ((double)plimit / 18446744073709551615.0)); printf("# Collecting statistics over %lu iterations\n", iterations); printf("# Size Count CountPerIteration\n"); fflush(stdout); for (i = 0u; i < iterations; i++) { size_t k = empty; cluster_fill(plimit); /* Add cluster sizes to the histogram. */ while (k-->0) ++count[occurs[k]]; } /* Print the histogram of cluster sizes. */ { size_t k = 1; printf("%zu %" PRIu64 " %.3f\n", k, count[k], (double)count[k] / (double)iterations); for (k = 2; k < sizes; k++) if (count[k-1] != 0 || count[k] != 0 || count[k+1] != 0) printf("%zu %" PRIu64 " %.3f\n", k, count[k], (double)count[k] / (double)iterations); } return EXIT_SUCCESS; } 

程序以Gnuplot兼容格式输出簇大小分布(直方图),从以#开头的几条注释行开始,以指示用于计算结果的确切参数。

输出中的第一列是簇大小(簇中的单元格数),第二列是迭代期间找到的此类簇的精确计数,第三列是观察到的发生概率(即,总计数)出现次数除以迭代次数)。

以下是L = 1000,N = 1000的簇大小分布与p的几个不同值相似: 群集大小分布 请注意,两个轴都具有对数缩放,因此它是对数 - 对数图。 它是通过运行上述程序几次生成的,每个唯一的p运行一次,并将输出保存到文件(包含p值的名称),然后将它们打印到Gnuplot中的单个图中。

为了validation聚类function,我编写了一个测试程序,它生成一个矩阵,为每个聚类分配随机颜色,并将其输出为PPM图像(可以使用NetPBM工具或基本上任何图像处理程序进行操作); ppm.c

 #include  #include  #include  #include "cluster.h" /* This program is in the public domain. Written by Nominal Animal  */ int main(int argc, char *argv[]) { uint64_t plimit; uint32_t *color; unsigned long size, r, c; double probability; char dummy; if (argc != 3) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: %s SIZE PROBABILITY > matrix.ppm\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, "Where SIZE sets the matrix size (SIZE x SIZE),\n"); fprintf(stderr, " PROBABILITY is the probability [0, 1] of a cell\n"); fprintf(stderr, " being non-empty.\n"); fprintf(stderr, "\n"); return EXIT_FAILURE; } if (sscanf(argv[1], " %lu %c", &size, &dummy) != 1 || size < 1u) { fprintf(stderr, "%s: Invalid matrix size.\n", argv[1]); return EXIT_FAILURE; } if (sscanf(argv[2], " %lf %c", &probability, &dummy) != 1 || probability < 0.0 || probability > 1.0) { fprintf(stderr, "%s: Invalid probability.\n", argv[2]); return EXIT_FAILURE; } /* Technically, we want plimit = (uint64_t)(0.5 + 18446744073709551615.0*p), but doubles do not have the precision for that. */ if (probability > 0.9999999999999999) plimit = UINT64_C(18446744073709551615); else if (probability <= 0) plimit = UINT64_C(0); else plimit = (uint64_t)(18446744073709551616.0 * probability); /* Try allocating memory for the matrix and the label count array. */ if (size > 2147483646u || cluster_init(size, size)) { fprintf(stderr, "%s: Matrix size is too large.\n", argv[1]); return EXIT_FAILURE; } /* Try allocating memory for the color lookup array. */ color = calloc(empty + 1, sizeof color[0]); if (!color) { fprintf(stderr, "%s: Matrix size is too large.\n", argv[1]); return EXIT_FAILURE; } fprintf(stderr, "Using Xorshift64* seed %" PRIu64 "\n", prng_randomize()); fflush(stderr); /* Construct the matrix. */ cluster_fill(plimit); { size_t i; color[empty] = 0xFFFFFFu; /* Assign random colors. */ for (i = 0; i < empty; i++) color[i] = prng_u64() >> 40; } printf("P6\n%lu %lu 255\n", size, size); for (r = 0; r < rows; r++) for (c = 0; c < cols; c++) { const uint32_t rgb = color[label[ncols*r + c]]; fputc((rgb >> 16) & 255, stdout); fputc((rgb >> 8) & 255, stdout); fputc( rgb & 255, stdout); } fflush(stdout); fprintf(stderr, "Done.\n"); return EXIT_SUCCESS; } 

这是p = 40%的256x256矩阵看起来像: 256x256矩阵,p = 40%

请注意,白色背景不计为群集。

这是我的另一个答案的补充,显示了一个分别计算白色(0)和黑色(1​​)聚类的示例程序。 这太长了,不适合单一的答案。

我们的想法是,我们使用单独的地图来表示每个单元格的颜色。 此贴图还包含边框单元格,其颜色与实际矩阵中的任何颜色都不匹配。 不相交集是一个单独的数组,每个矩阵单元有一个条目。

为了跟踪这两种颜色,我们在计算每个不相交集合根的出现次数时,为每种颜色使用单独的数组。 (或者换句话说,矩阵中每个簇的大小。)

收集统计信息时,群集大小将更新为每种颜色的单独直方图。 即使内存消耗很大,这种方案似乎也非常有效。 ( R行和C列的矩阵需要大约25*R*C + R + 2*C (加上常量)字节,包括直方图。)

这是头文件clusters.h ,它实现了所有的计算逻辑:

 #ifndef CLUSTERS_H #define CLUSTERS_H #include  #include  #include  #include  /* This file is in public domain. No guarantees, no warranties. Written by Nominal Animal . */ /* For pure C89 compilers, use '-DSTATIC_INLINE=static' at compile time. */ #ifndef STATIC_INLINE #define STATIC_INLINE static inline #endif #define ERR_INVALID -1 /* Invalid function parameter */ #define ERR_TOOLARGE -2 /* Matrix size is too large */ #define ERR_NOMEM -3 /* Out of memory */ typedef unsigned char cluster_color; typedef uint32_t cluster_label; typedef uint64_t cluster_count; #define CLUSTER_WHITE 0 #define CLUSTER_BLACK 1 #define CLUSTER_NONE UCHAR_MAX /* Reserved */ #define FMT_COLOR "u" #define FMT_LABEL PRIu32 #define FMT_COUNT PRIu64 typedef struct { uint64_t state; } prng; typedef struct { /* Pseudo-random number generator used */ prng rng; /* Actual size of the matrix */ cluster_label rows; cluster_label cols; /* Number of matrices the histograms have been collected from */ cluster_count iterations; /* Probability of each cell being black */ uint64_t p_black; /* Probability of diagonal connections */ uint64_t d_black; uint64_t d_white; /* Cluster colormap contains (rows+2) rows and (cols+1) columns */ cluster_color *map; /* Disjoint set of (rows) rows and (cols) columns */ cluster_label *djs; /* Number of occurrences per disjoint set root */ cluster_label *white_roots; cluster_label *black_roots; /* Histograms of white and black clusters */ cluster_count *white_histogram; cluster_count *black_histogram; } cluster; #define CLUSTER_INITIALIZER { {0}, 0, 0, 0, 0.0, 0.0, 0.0, NULL, NULL, NULL, NULL, NULL, NULL } /* Calculate uint64_t limit corresponding to probability p. */ STATIC_INLINE uint64_t probability_limit(const double p) { if (p <= 0.0) return UINT64_C(0); else if (p <= 0.5) return (uint64_t)(p * 18446744073709551615.0); else if (p >= 1.0) return UINT64_C(18446744073709551615); else return UINT64_C(18446744073709551615) - (uint64_t)((double)(1.0 - p) * 18446744073709551615.0); } /* Return true at probability corresponding to limit 'limit'. This implements a Xorshift64* pseudo-random number generator. */ STATIC_INLINE int probability(prng *const rng, const uint64_t limit) { uint64_t state = rng->state; uint64_t value; /* To correctly cover the entire range, we ensure we never generate a zero. */ do { state ^= state >> 12; state ^= state << 25; state ^= state >> 27; value = state * UINT64_C(2685821657736338717); } while (!value); rng->state = state; return (value <= limit) ? CLUSTER_BLACK : CLUSTER_WHITE; } /* Generate a random seed for the Xorshift64* pseudo-random number generator. */ static uint64_t randomize(prng *const rng) { unsigned int rounds = 127; uint64_t state = UINT64_C(3069887672279) * (uint64_t)time(NULL) ^ UINT64_C(60498839) * (uint64_t)clock(); if (!state) state = 1; /* Churn the state a bit. */ while (rounds-->0) { state ^= state >> 12; state ^= state << 25; state ^= state >> 27; } if (rng) rng->state = state; return state; } /* Free all resources related to a cluster. */ STATIC_INLINE void free_cluster(cluster *c) { if (c) { /* Free dynamically allocated pointers. Note: free(NULL) is safe. */ free(c->map); free(c->djs); free(c->white_roots); free(c->black_roots); free(c->white_histogram); free(c->black_histogram); c->rng.state = 0; c->rows = 0; c->cols = 0; c->iterations = 0; c->p_black = 0; c->d_white = 0; c->d_black = 0; c->map = NULL; c->djs = NULL; c->white_roots = 0; c->black_roots = 0; c->white_histogram = NULL; c->black_histogram = NULL; } } /* Initialize cluster structure, for a matrix of specified size. */ static int init_cluster(cluster *c, const int rows, const int cols, const double p_black, const double d_white, const double d_black) { const cluster_label label_cols = cols; const cluster_label label_rows = rows; const cluster_label color_rows = rows + 2; const cluster_label color_cols = cols + 1; const cluster_label color_cells = color_rows * color_cols; const cluster_label label_cells = label_rows * label_cols; const cluster_label labels = label_cells + 2; /* One extra! */ if (!c) return ERR_INVALID; c->rng.state = 0; /* Invalid seed for Xorshift64*. */ c->rows = 0; c->cols = 0; c->iterations = 0; c->p_black = 0; c->d_white = 0; c->d_black = 0; c->map = NULL; c->djs = NULL; c->white_roots = NULL; c->black_roots = NULL; c->white_histogram = NULL; c->black_histogram = NULL; if (rows < 1 || cols < 1) return ERR_INVALID; if ((unsigned int)color_rows <= (unsigned int)rows || (unsigned int)color_cols <= (unsigned int)cols || (cluster_label)(color_cells / color_rows) != color_cols || (cluster_label)(color_cells / color_cols) != color_rows || (cluster_label)(label_cells / label_rows) != label_cols || (cluster_label)(label_cells / label_cols) != label_rows) return ERR_TOOLARGE; c->black_histogram = calloc(labels, sizeof (cluster_count)); c->white_histogram = calloc(labels, sizeof (cluster_count)); c->black_roots = calloc(labels, sizeof (cluster_label)); c->white_roots = calloc(labels, sizeof (cluster_label)); c->djs = calloc(label_cells, sizeof (cluster_label)); c->map = calloc(color_cells, sizeof (cluster_color)); if (!c->map || !c->djs || !c->white_roots || !c->black_roots || !c->white_histogram || !c->black_histogram) { free(c->map); free(c->djs); free(c->white_roots); free(c->black_roots); free(c->white_histogram); free(c->black_histogram); return ERR_NOMEM; } c->rows = rows; c->cols = cols; c->p_black = probability_limit(p_black); c->d_white = probability_limit(d_white); c->d_black = probability_limit(d_black); /* Initialize the color map to NONE. */ { cluster_color *ptr = c->map; cluster_color *const end = c->map + color_cells; while (ptr < end) *(ptr++) = CLUSTER_NONE; } /* calloc() initialized the other arrays to zeros already. */ return 0; } /* Disjoint set: find root. */ STATIC_INLINE cluster_label djs_root(const cluster_label *const djs, cluster_label from) { while (from != djs[from]) from = djs[from]; return from; } /* Disjoint set: path compression. */ STATIC_INLINE void djs_path(cluster_label *const djs, cluster_label from, const cluster_label root) { while (from != root) { const cluster_label temp = djs[from]; djs[from] = root; from = temp; } } /* Disjoint set: Flatten. Returns the root, and flattens the path to it. */ STATIC_INLINE cluster_label djs_flatten(cluster_label *const djs, cluster_label from) { const cluster_label root = djs_root(djs, from); djs_path(djs, from, root); return root; } /* Disjoint set: Join two subsets. */ STATIC_INLINE void djs_join2(cluster_label *const djs, cluster_label from1, cluster_label from2) { cluster_label root, temp; root = djs_root(djs, from1); temp = djs_root(djs, from2); if (root > temp) temp = root; djs_path(djs, from1, root); djs_path(djs, from2, root); } /* Disjoint set: Join three subsets. */ STATIC_INLINE void djs_join3(cluster_label *const djs, cluster_label from1, cluster_label from2, cluster_label from3) { cluster_label root, temp; root = djs_root(djs, from1); temp = djs_root(djs, from2); if (root > temp) root = temp; temp = djs_root(djs, from3); if (root > temp) root = temp; djs_path(djs, from1, root); djs_path(djs, from2, root); djs_path(djs, from3, root); } /* Disjoint set: Join four subsets. */ STATIC_INLINE void djs_join4(cluster_label *const djs, cluster_label from1, cluster_label from2, cluster_label from3, cluster_label from4) { cluster_label root, temp; root = djs_root(djs, from1); temp = djs_root(djs, from2); if (root > temp) root = temp; temp = djs_root(djs, from3); if (root > temp) root = temp; temp = djs_root(djs, from4); if (root > temp) root = temp; djs_path(djs, from1, root); djs_path(djs, from2, root); djs_path(djs, from3, root); djs_path(djs, from4, root); } /* Disjoint set: Join five subsets. */ STATIC_INLINE void djs_join5(cluster_label *const djs, cluster_label from1, cluster_label from2, cluster_label from3, cluster_label from4, cluster_label from5) { cluster_label root, temp; root = djs_root(djs, from1); temp = djs_root(djs, from2); if (root > temp) root = temp; temp = djs_root(djs, from3); if (root > temp) root = temp; temp = djs_root(djs, from4); if (root > temp) root = temp; temp = djs_root(djs, from5); if (root > temp) root = temp; djs_path(djs, from1, root); djs_path(djs, from2, root); djs_path(djs, from3, root); djs_path(djs, from4, root); djs_path(djs, from5, root); } static void iterate(cluster *const cl) { prng *const rng = &(cl->rng); uint64_t const p_black = cl->p_black; uint64_t d_color[2]; cluster_color *const map = cl->map + cl->cols + 2; cluster_label const map_stride = cl->cols + 1; cluster_label *const djs = cl->djs; cluster_label *roots[2]; cluster_label const rows = cl->rows; cluster_label const cols = cl->cols; int r, c; d_color[CLUSTER_WHITE] = cl->d_white; d_color[CLUSTER_BLACK] = cl->d_black; roots[CLUSTER_WHITE] = cl->white_roots; roots[CLUSTER_BLACK] = cl->black_roots; for (r = 0; r < rows; r++) { cluster_label const curr_i = r * cols; cluster_color *const curr_row = map + r * map_stride; cluster_color *const prev_row = curr_row - map_stride; for (c = 0; c < cols; c++) { cluster_color color = probability(rng, p_black); cluster_label label = curr_i + c; uint64_t diag = d_color[color]; unsigned int joins = 0; /* Assign the label and color of the current cell, */ djs[label] = label; curr_row[c] = color; /* Because we join left, up-left, up, and up-right, and all those have been assigned to already, we can do the necessary joins right now. */ /* Join left? */ joins |= (curr_row[c-1] == color) << 0; /* Join up? */ joins |= (prev_row[c ] == color) << 1; /* Join up left? */ joins |= (prev_row[c-1] == color && probability(rng, diag)) << 2; /* Join up right? */ joins |= (prev_row[c+1] == color && probability(rng, diag)) << 3; /* Do the corresponding joins. */ switch (joins) { case 1: /* Left */ djs_join2(djs, label, label - 1); break; case 2: /* Up */ djs_join2(djs, label, label - cols); break; case 3: /* Left and up */ djs_join3(djs, label, label - 1, label - cols); break; case 4: /* Up-left */ djs_join2(djs, label, label - cols - 1); break; case 5: /* Left and up-left */ djs_join3(djs, label, label - 1, label - cols - 1); break; case 6: /* Up and up-left */ djs_join3(djs, label, label - cols, label - cols - 1); break; case 7: /* Left, up, and up-left */ djs_join4(djs, label, label - 1, label - cols, label - cols - 1); break; case 8: /* Up-right */ djs_join2(djs, label, label - cols + 1); break; case 9: /* Left and up-right */ djs_join3(djs, label, label - 1, label - cols + 1); break; case 10: /* Up and up-right */ djs_join3(djs, label, label - cols, label - cols + 1); break; case 11: /* Left, up, and up-right */ djs_join4(djs, label, label - 1, label - cols, label - cols + 1); break; case 12: /* Up-left and up-right */ djs_join3(djs, label, label - cols - 1, label - cols + 1); break; case 13: /* Left, up-left, and up-right */ djs_join4(djs, label, label - 1, label - cols - 1, label - cols + 1); break; case 14: /* Up, up-left, and up-right */ djs_join4(djs, label, label - cols, label - cols - 1, label - cols + 1); break; case 15: /* Left, up, up-left, and up-right */ djs_join5(djs, label, label - 1, label - cols, label - cols - 1, label - cols + 1); break; } } } /* Count the occurrences of each disjoint-set root label. */ if (roots[0] && roots[1]) { const size_t labels = rows * cols + 2; /* Clear the counts. */ memset(roots[0], 0, labels * sizeof (cluster_label)); memset(roots[1], 0, labels * sizeof (cluster_label)); for (r = 0; r < rows; r++) { const cluster_color *const curr_row = map + r * map_stride; const cluster_label curr_i = r * cols; for (c = 0; c < cols; c++) roots[curr_row[c]][djs_flatten(djs, curr_i + c)]++; } } else { size_t i = rows * cols; while (i-->0) djs_flatten(djs, i); } /* Collect the statistics. */ if (cl->white_histogram && roots[0]) { const cluster_label *const root_count = roots[0]; cluster_count *const histogram = cl->white_histogram; size_t i = rows * cols + 1; while (i-->0) histogram[root_count[i]]++; } if (cl->black_histogram && roots[1]) { const cluster_label *const root_count = roots[1]; cluster_count *const histogram = cl->black_histogram; size_t i = rows * cols + 1; while (i-->0) histogram[root_count[i]]++; } /* Note: index zero and (rows*cols+1) are zero in the histogram, for ease of scanning. */ if (cl->white_histogram || cl->black_histogram) { const size_t n = rows * cols + 1; if (cl->white_histogram) { cl->white_histogram[0] = 0; cl->white_histogram[n] = 0; } if (cl->black_histogram) { cl->black_histogram[0] = 0; cl->black_histogram[n] = 0; } } /* One more iteration performed. */ cl->iterations++; } #endif /* CLUSTERS_H */ 

Here is an example program, distribution.c , that collects cluster size statistics over a number of iterations, and outputs them in format suitable for plotting with for example Gnuplot. Run it without arguments to see usage and help.

 #include  #include  #include  #include  #include "clusters.h" /* This file is in public domain. No guarantees, no warranties. Written by Nominal Animal . */ #define DEFAULT_ROWS 100 #define DEFAULT_COLS 100 #define DEFAULT_P_BLACK 0.0 #define DEFAULT_D_WHITE 0.0 #define DEFAULT_D_BLACK 0.0 #define DEFAULT_ITERS 1 int usage(const char *argv0) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv0); fprintf(stderr, " %s OPTIONS [ > output.txt ]\n", argv0); fprintf(stderr, "\n"); fprintf(stderr, "Options:\n"); fprintf(stderr, " rows=SIZE Set number of rows. Default is %d.\n", DEFAULT_ROWS); fprintf(stderr, " cols=SIZE Set number of columns. Default is %d.\n", DEFAULT_ROWS); fprintf(stderr, " L=SIZE Set rows=SIZE and cols=SIZE.\n"); fprintf(stderr, " black=P Set the probability of a cell to be black. Default is %g.\n", DEFAULT_P_BLACK); fprintf(stderr, " All non-black cells are white.\n"); fprintf(stderr, " dwhite=P Set the probability of white cells connecting diagonally.\n"); fprintf(stderr, " Default is %g.\n", DEFAULT_D_WHITE); fprintf(stderr, " dblack=P Set the probability of black cells connecting diagonally.\n"); fprintf(stderr, " Default is %g.\n", DEFAULT_D_BLACK); fprintf(stderr, " N=COUNT Number of iterations for gathering statistics. Default is %d.\n", DEFAULT_ITERS); fprintf(stderr, " seed=U64 Set the Xorshift64* pseudorandom number generator seed; nonzero.\n"); fprintf(stderr, " Default is to pick one randomly (based on time).\n"); fprintf(stderr, "\n"); fprintf(stderr, "The output consists of comment lines and data lines.\n"); fprintf(stderr, "Comment lines begin with a #:\n"); fprintf(stderr, " # This is a comment line.\n"); fprintf(stderr, "Each data line contains a cluster size, the number of white clusters of that size\n"); fprintf(stderr, "observed during iterations, the number of black clusters of that size observed\n"); fprintf(stderr, "during iterations, and the number of any clusters of that size observed:\n"); fprintf(stderr, " SIZE WHITE_CLUSTERS BLACK_CLUSTERS TOTAL_CLUSTERS\n"); fprintf(stderr, "\n"); return EXIT_SUCCESS; } int main(int argc, char *argv[]) { int rows = DEFAULT_ROWS; int cols = DEFAULT_COLS; double p_black = DEFAULT_P_BLACK; double d_white = DEFAULT_D_WHITE; double d_black = DEFAULT_D_BLACK; long iters = DEFAULT_ITERS; uint64_t seed = 0; cluster c = CLUSTER_INITIALIZER; int arg, itemp; uint64_t u64temp; double dtemp; long ltemp; char dummy; size_t n; size_t i; if (argc < 2) return usage(argv[0]); for (arg = 1; arg < argc; arg++) if (!strcmp(argv[arg], "-h") || !strcmp(argv[arg], "/?") || !strcmp(argv[arg], "--help")) return usage(argv[0]); else if (sscanf(argv[arg], "L=%d %c", &itemp, &dummy) == 1 || sscanf(argv[arg], "l=%d %c", &itemp, &dummy) == 1 || sscanf(argv[arg], "size=%d %c", &itemp, &dummy) == 1) { rows = itemp; cols = itemp; } else if (sscanf(argv[arg], "seed=%" SCNu64 " %c", &u64temp, &dummy) == 1 || sscanf(argv[arg], "seed=%" SCNx64 " %c", &u64temp, &dummy) == 1 || sscanf(argv[arg], "s=%" SCNu64 " %c", &u64temp, &dummy) == 1 || sscanf(argv[arg], "s=%" SCNx64 " %c", &u64temp, &dummy) == 1) { seed = u64temp; } else if (sscanf(argv[arg], "N=%ld %c", &ltemp, &dummy) == 1 || sscanf(argv[arg], "n=%ld %c", &ltemp, &dummy) == 1 || sscanf(argv[arg], "count=%ld %c", &ltemp, &dummy) == 1) { iters = ltemp; } else if (sscanf(argv[arg], "rows=%d %c", &itemp, &dummy) == 1 || sscanf(argv[arg], "r=%d %c", &itemp, &dummy) == 1 || sscanf(argv[arg], "height=%d %c", &itemp, &dummy) == 1 || sscanf(argv[arg], "h=%d %c", &itemp, &dummy) == 1) { rows = itemp; } else if (sscanf(argv[arg], "columns=%d %c", &itemp, &dummy) == 1 || sscanf(argv[arg], "cols=%d %c", &itemp, &dummy) == 1 || sscanf(argv[arg], "c=%d %c", &itemp, &dummy) == 1 || sscanf(argv[arg], "width=%d %c", &itemp, &dummy) == 1 || sscanf(argv[arg], "w=%d %c", &itemp, &dummy) == 1) { cols = itemp; } else if (sscanf(argv[arg], "black=%lf %c", &dtemp, &dummy) == 1 || sscanf(argv[arg], "p0=%lf %c", &dtemp, &dummy) == 1 || sscanf(argv[arg], "b=%lf %c", &dtemp, &dummy) == 1 || sscanf(argv[arg], "P=%lf %c", &dtemp, &dummy) == 1 || sscanf(argv[arg], "p0=%lf %c", &dtemp, &dummy) == 1 || sscanf(argv[arg], "p=%lf %c", &dtemp, &dummy) == 1) { p_black = dtemp; } else if (sscanf(argv[arg], "white=%lf %c", &dtemp, &dummy) == 1 || sscanf(argv[arg], "p1=%lf %c", &dtemp, &dummy) == 1) { p_black = 1.0 - dtemp; } else if (sscanf(argv[arg], "dwhite=%lf %c", &dtemp, &dummy) == 1 || sscanf(argv[arg], "dw=%lf %c", &dtemp, &dummy) == 1 || sscanf(argv[arg], "d0=%lf %c", &dtemp, &dummy) == 1) { d_white = dtemp; } else if (sscanf(argv[arg], "dblack=%lf %c", &dtemp, &dummy) == 1 || sscanf(argv[arg], "db=%lf %c", &dtemp, &dummy) == 1 || sscanf(argv[arg], "d1=%lf %c", &dtemp, &dummy) == 1) { d_black = dtemp; } else { fprintf(stderr, "%s: Unknown option.\n", argv[arg]); return EXIT_FAILURE; } switch (init_cluster(&c, rows, cols, p_black, d_white, d_black)) { case 0: break; /* OK */ case ERR_INVALID: fprintf(stderr, "Invalid size.\n"); return EXIT_FAILURE; case ERR_TOOLARGE: fprintf(stderr, "Size is too large.\n"); return EXIT_FAILURE; case ERR_NOMEM: fprintf(stderr, "Not enough memory.\n"); return EXIT_FAILURE; } if (!seed) seed = randomize(NULL); c.rng.state = seed; /* The largest possible cluster has n cells. */ n = (size_t)rows * (size_t)cols; /* Print the comments describing the initial parameters. */ printf("# seed: %" PRIu64 " (Xorshift 64*)\n", seed); printf("# size: %d rows, %d columns\n", rows, cols); printf("# P(black): %.6f (%" PRIu64 "/18446744073709551615)\n", p_black, c.p_black); printf("# P(black connected diagonally): %.6f (%" PRIu64 "/18446744073709551615)\n", d_black, c.d_black); printf("# P(white connected diagonally): %.6f (%" PRIu64 "/18446744073709551615)\n", d_white, c.d_white); fflush(stdout); while (iters-->0) iterate(&c); printf("# Iterations: %" PRIu64 "\n", c.iterations); printf("#\n"); printf("# size white_clusters(size) black_clusters(size) clusters(size)\n"); /* Note: c._histogram[0] == c._histogram[n] == 0, for ease of scanning. */ for (i = 1; i <= n; i++) if (c.white_histogram[i-1] || c.white_histogram[i] || c.white_histogram[i+1] || c.black_histogram[i-1] || c.black_histogram[i] || c.black_histogram[i+1]) printf("%lu %" FMT_COUNT " %" FMT_COUNT " %" FMT_COUNT "\n", (unsigned long)i, c.white_histogram[i], c.black_histogram[i], c.white_histogram[i]+c.black_histogram[i]); /* Since we are exiting anyway, this is not really necessary. */ free_cluster(&c); /* All done. */ return EXIT_SUCCESS; } 

Here is a plot generated by running distribution.c with L=100 black=0.5 dwhite=0 dblack=0.25 seed=20120622 N=100000 (cluster size distributions collected from hundred thousand 100x100 matrices, where the probability of a nonzero cell is 50%, and there is a 25% probability of nonzero cells connecting diagonally, zero cells never connecting diagonally, only horizontally and vertically): Example cluster distribution As you can see, because nonzero clusters can sometimes connect diagonally too, there are more larger nonzero clusters than zero clusters. The computation took about 37 seconds on an i5-7200U laptop. Because the Xorshift64* seed is given explicitly, this is a repeatable test.