Static Value-Flow Analysis
Loading...
Searching...
No Matches
fastcluster.cpp
Go to the documentation of this file.
1//
2// C++ standalone version of fastcluster by Daniel Müllner
3//
4// Copyright: Christoph Dalitz, 2020
5// Daniel Müllner, 2011
6// License: BSD style license
7// (see the file LICENSE for details)
8//
9
10
11#include <assert.h>
12#include <vector>
13#include <algorithm>
14
16
17// Code by Daniel Müllner
18// workaround to make it usable as a standalone version (without R)
19bool fc_isnan(double x)
20{
21 return false;
22}
25
26//
27// Assigns cluster labels (0, ..., nclust-1) to the n points such
28// that the cluster result is split into nclust clusters.
29//
30// Input arguments:
31// n = number of observables
32// merge = clustering result in R format
33// nclust = number of clusters
34// Output arguments:
35// labels = allocated integer array of size n for result
36//
37void cutree_k(int n, const int* merge, int nclust, int* labels)
38{
39
40 int k,m1,m2,j,l;
41
42 if (nclust > n || nclust < 2)
43 {
44 for (j=0; j<n; j++) labels[j] = 0;
45 return;
46 }
47
48 // assign to each observable the number of its last merge step
49 // beware: indices of observables in merge start at 1 (R convention)
50 std::vector<int> last_merge(n, 0);
51 for (k=1; k<=(n-nclust); k++)
52 {
53 // (m1,m2) = merge[k,]
54 m1 = merge[k-1];
55 m2 = merge[n-1+k-1];
56 if (m1 < 0 && m2 < 0) // both single observables
57 {
58 last_merge[-m1-1] = last_merge[-m2-1] = k;
59 }
60 else if (m1 < 0 || m2 < 0) // one is a cluster
61 {
62 if(m1 < 0)
63 {
64 j = -m1;
65 m1 = m2;
66 }
67 else j = -m2;
68 // merging single observable and cluster
69 for(l = 0; l < n; l++)
70 if (last_merge[l] == m1)
71 last_merge[l] = k;
72 last_merge[j-1] = k;
73 }
74 else // both cluster
75 {
76 for(l=0; l < n; l++)
77 {
78 if( last_merge[l] == m1 || last_merge[l] == m2 )
79 last_merge[l] = k;
80 }
81 }
82 }
83
84 // assign cluster labels
85 int label = 0;
86 std::vector<int> z(n,-1);
87 for (j=0; j<n; j++)
88 {
89 if (last_merge[j] == 0) // still singleton
90 {
91 labels[j] = label++;
92 }
93 else
94 {
95 if (z[last_merge[j]] < 0)
96 {
97 z[last_merge[j]] = label++;
98 }
99 labels[j] = z[last_merge[j]];
100 }
101 }
102}
103
104//
105// Assigns cluster labels (0, ..., nclust-1) to the n points such
106// that the hierarchical clustering is stopped when cluster distance >= cdist
107//
108// Input arguments:
109// n = number of observables
110// merge = clustering result in R format
111// height = cluster distance at each merge step
112// cdist = cutoff cluster distance
113// Output arguments:
114// labels = allocated integer array of size n for result
115//
116void cutree_cdist(int n, const int* merge, double* height, double cdist, int* labels)
117{
118
119 int k;
120
121 for (k=0; k<(n-1); k++)
122 {
123 if (height[k] >= cdist)
124 {
125 break;
126 }
127 }
128 cutree_k(n, merge, n-k, labels);
129}
130
131
132//
133// Hierarchical clustering with one of Daniel Muellner's fast algorithms
134//
135// Input arguments:
136// n = number of observables
137// distmat = condensed distance matrix, i.e. an n*(n-1)/2 array representing
138// the upper triangle (without diagonal elements) of the distance
139// matrix, e.g. for n=4:
140// d00 d01 d02 d03
141// d10 d11 d12 d13 -> d01 d02 d03 d12 d13 d23
142// d20 d21 d22 d23
143// d30 d31 d32 d33
144// method = cluster metric (see enum hclust_fast_methods)
145// Output arguments:
146// merge = allocated (n-1)x2 matrix (2*(n-1) array) for storing result.
147// Result follows R hclust convention:
148// - observable indices start with one
149// - merge[i][] contains the merged nodes in step i
150// - merge[i][j] is negative when the node is an atom
151// height = allocated (n-1) array with distances at each merge step
152// Return code:
153// 0 = ok
154// 1 = invalid method
155//
156int hclust_fast(int n, double* distmat, int method, int* merge, double* height)
157{
158
159 // call appropriate clustering function
160 cluster_result Z2(n-1);
161 if (method == HCLUST_METHOD_SINGLE)
162 {
163 // single link
164 MST_linkage_core(n, distmat, Z2);
165 }
166 else if (method == HCLUST_METHOD_COMPLETE)
167 {
168 // complete link
169 NN_chain_core<METHOD_METR_COMPLETE, t_float>(n, distmat, NULL, Z2);
170 }
171 else if (method == HCLUST_METHOD_AVERAGE)
172 {
173 // best average distance
174 double* members = new double[n];
175 for (int i=0; i<n; i++) members[i] = 1;
176 NN_chain_core<METHOD_METR_AVERAGE, t_float>(n, distmat, members, Z2);
177 delete[] members;
178 }
179 else if (method == HCLUST_METHOD_MEDIAN)
180 {
181 // best median distance (beware: O(n^3))
182 generic_linkage<METHOD_METR_MEDIAN, t_float>(n, distmat, NULL, Z2);
183 }
184 else
185 {
186 return 1;
187 }
188
189 int* order = new int[n];
190 if (method == HCLUST_METHOD_MEDIAN)
191 {
192 generate_R_dendrogram<true>(merge, height, order, Z2, n);
193 }
194 else
195 {
196 generate_R_dendrogram<false>(merge, height, order, Z2, n);
197 }
198
199 delete[] order; // only needed for visualization
200
201 return 0;
202}
cJSON * n
Definition cJSON.cpp:2558
#define NULL
Definition extapi.c:2
void cutree_k(int n, const int *merge, int nclust, int *labels)
bool fc_isnan(double x)
void cutree_cdist(int n, const int *merge, double *height, double cdist, int *labels)
int hclust_fast(int n, double *distmat, int method, int *merge, double *height)
@ HCLUST_METHOD_AVERAGE
Definition fastcluster.h:72
@ HCLUST_METHOD_COMPLETE
Definition fastcluster.h:70
@ HCLUST_METHOD_MEDIAN
Definition fastcluster.h:74
@ HCLUST_METHOD_SINGLE
Definition fastcluster.h:68