Static Value-Flow Analysis
fastcluster.cpp
Go to the documentation of this file.
1 //
2 // C++ standalone version of fastcluster by Daniel Müllner
3 //
4 // Copyright: Christoph Dalitz, 2020
5 // Daniel Müllner, 2011
6 // License: BSD style license
7 // (see the file LICENSE for details)
8 //
9 
10 
11 #include <assert.h>
12 #include <vector>
13 #include <algorithm>
14 
16 
17 // Code by Daniel Müllner
18 // workaround to make it usable as a standalone version (without R)
19 bool fc_isnan(double x)
20 {
21  return false;
22 }
23 #include "fastcluster_dm.cpp.inc"
24 #include "fastcluster_R_dm.cpp.inc"
25 
26 //
27 // Assigns cluster labels (0, ..., nclust-1) to the n points such
28 // that the cluster result is split into nclust clusters.
29 //
30 // Input arguments:
31 // n = number of observables
32 // merge = clustering result in R format
33 // nclust = number of clusters
34 // Output arguments:
35 // labels = allocated integer array of size n for result
36 //
37 void cutree_k(int n, const int* merge, int nclust, int* labels)
38 {
39 
40  int k,m1,m2,j,l;
41 
42  if (nclust > n || nclust < 2)
43  {
44  for (j=0; j<n; j++) labels[j] = 0;
45  return;
46  }
47 
48  // assign to each observable the number of its last merge step
49  // beware: indices of observables in merge start at 1 (R convention)
50  std::vector<int> last_merge(n, 0);
51  for (k=1; k<=(n-nclust); k++)
52  {
53  // (m1,m2) = merge[k,]
54  m1 = merge[k-1];
55  m2 = merge[n-1+k-1];
56  if (m1 < 0 && m2 < 0) // both single observables
57  {
58  last_merge[-m1-1] = last_merge[-m2-1] = k;
59  }
60  else if (m1 < 0 || m2 < 0) // one is a cluster
61  {
62  if(m1 < 0)
63  {
64  j = -m1;
65  m1 = m2;
66  }
67  else j = -m2;
68  // merging single observable and cluster
69  for(l = 0; l < n; l++)
70  if (last_merge[l] == m1)
71  last_merge[l] = k;
72  last_merge[j-1] = k;
73  }
74  else // both cluster
75  {
76  for(l=0; l < n; l++)
77  {
78  if( last_merge[l] == m1 || last_merge[l] == m2 )
79  last_merge[l] = k;
80  }
81  }
82  }
83 
84  // assign cluster labels
85  int label = 0;
86  std::vector<int> z(n,-1);
87  for (j=0; j<n; j++)
88  {
89  if (last_merge[j] == 0) // still singleton
90  {
91  labels[j] = label++;
92  }
93  else
94  {
95  if (z[last_merge[j]] < 0)
96  {
97  z[last_merge[j]] = label++;
98  }
99  labels[j] = z[last_merge[j]];
100  }
101  }
102 }
103 
104 //
105 // Assigns cluster labels (0, ..., nclust-1) to the n points such
106 // that the hierarchical clustering is stopped when cluster distance >= cdist
107 //
108 // Input arguments:
109 // n = number of observables
110 // merge = clustering result in R format
111 // height = cluster distance at each merge step
112 // cdist = cutoff cluster distance
113 // Output arguments:
114 // labels = allocated integer array of size n for result
115 //
116 void cutree_cdist(int n, const int* merge, double* height, double cdist, int* labels)
117 {
118 
119  int k;
120 
121  for (k=0; k<(n-1); k++)
122  {
123  if (height[k] >= cdist)
124  {
125  break;
126  }
127  }
128  cutree_k(n, merge, n-k, labels);
129 }
130 
131 
132 //
133 // Hierarchical clustering with one of Daniel Muellner's fast algorithms
134 //
135 // Input arguments:
136 // n = number of observables
137 // distmat = condensed distance matrix, i.e. an n*(n-1)/2 array representing
138 // the upper triangle (without diagonal elements) of the distance
139 // matrix, e.g. for n=4:
140 // d00 d01 d02 d03
141 // d10 d11 d12 d13 -> d01 d02 d03 d12 d13 d23
142 // d20 d21 d22 d23
143 // d30 d31 d32 d33
144 // method = cluster metric (see enum hclust_fast_methods)
145 // Output arguments:
146 // merge = allocated (n-1)x2 matrix (2*(n-1) array) for storing result.
147 // Result follows R hclust convention:
148 // - observable indices start with one
149 // - merge[i][] contains the merged nodes in step i
150 // - merge[i][j] is negative when the node is an atom
151 // height = allocated (n-1) array with distances at each merge step
152 // Return code:
153 // 0 = ok
154 // 1 = invalid method
155 //
156 int hclust_fast(int n, double* distmat, int method, int* merge, double* height)
157 {
158 
159  // call appropriate clustering function
160  cluster_result Z2(n-1);
161  if (method == HCLUST_METHOD_SINGLE)
162  {
163  // single link
164  MST_linkage_core(n, distmat, Z2);
165  }
166  else if (method == HCLUST_METHOD_COMPLETE)
167  {
168  // complete link
169  NN_chain_core<METHOD_METR_COMPLETE, t_float>(n, distmat, NULL, Z2);
170  }
171  else if (method == HCLUST_METHOD_AVERAGE)
172  {
173  // best average distance
174  double* members = new double[n];
175  for (int i=0; i<n; i++) members[i] = 1;
176  NN_chain_core<METHOD_METR_AVERAGE, t_float>(n, distmat, members, Z2);
177  delete[] members;
178  }
179  else if (method == HCLUST_METHOD_MEDIAN)
180  {
181  // best median distance (beware: O(n^3))
182  generic_linkage<METHOD_METR_MEDIAN, t_float>(n, distmat, NULL, Z2);
183  }
184  else
185  {
186  return 1;
187  }
188 
189  int* order = new int[n];
190  if (method == HCLUST_METHOD_MEDIAN)
191  {
192  generate_R_dendrogram<true>(merge, height, order, Z2, n);
193  }
194  else
195  {
196  generate_R_dendrogram<false>(merge, height, order, Z2, n);
197  }
198 
199  delete[] order; // only needed for visualization
200 
201  return 0;
202 }
return NULL
Definition: cJSON.cpp:1173
cJSON * n
Definition: cJSON.cpp:2558
void cutree_k(int n, const int *merge, int nclust, int *labels)
Definition: fastcluster.cpp:37
bool fc_isnan(double x)
Definition: fastcluster.cpp:19
void cutree_cdist(int n, const int *merge, double *height, double cdist, int *labels)
int hclust_fast(int n, double *distmat, int method, int *merge, double *height)
@ HCLUST_METHOD_AVERAGE
Definition: fastcluster.h:72
@ HCLUST_METHOD_COMPLETE
Definition: fastcluster.h:70
@ HCLUST_METHOD_MEDIAN
Definition: fastcluster.h:74
@ HCLUST_METHOD_SINGLE
Definition: fastcluster.h:68