planc
Parallel Lowrank Approximation with Non-negativity Constraints
utils.hpp
Go to the documentation of this file.
1 /* Copyright 2016 Ramakrishnan Kannan */
2 // utility functions
3 #ifndef COMMON_UTILS_HPP_
4 #define COMMON_UTILS_HPP_
5 #include <assert.h>
6 #include <omp.h>
7 #include <stdio.h>
8 #include <chrono>
9 #include <ctime>
10 #include <stack>
11 #include <typeinfo>
12 #include <vector>
13 #include "common/utils.h"
14 #ifdef MKL_FOUND
15 #include <mkl.h>
16 #else
17 #include <cblas.h>
18 #endif
19 
20 static ULONG powersof10[16] = {1,
21  10,
22  100,
23  1000,
24  10000,
25  100000,
26  1000000,
27  10000000,
28  100000000,
29  1000000000,
30  10000000000,
31  100000000000,
32  1000000000000,
33  10000000000000,
34  100000000000000,
35  1000000000000000};
36 
37 static std::stack<std::chrono::steady_clock::time_point> tictoc_stack;
38 static std::stack<double> tictoc_stack_omp_clock;
39 
40 
42 inline void tic() { tictoc_stack.push(std::chrono::steady_clock::now()); }
43 
44 /***
45  * Returns the time taken between the most recent tic() to itself.
46  * @return time in seconds.
47 */
48 inline double toc() {
49  std::chrono::duration<double> time_span =
50  std::chrono::duration_cast<std::chrono::duration<double>>(
51  std::chrono::steady_clock::now() - tictoc_stack.top());
52  double rc = time_span.count();
53  tictoc_stack.pop();
54  return rc;
55 }
56 
57 template <class T>
58 void fixNumericalError(T *X, const double prec = EPSILON_1EMINUS16) {
59  (*X).for_each(
60  [&](typename T::elem_type &val) { val = (val < prec) ? prec : val; });
61 }
62 
63 template <class T>
64 void fixDecimalPlaces(T *X, const int places = NUMBEROF_DECIMAL_PLACES) {
65  (*X).for_each([&](typename T::elem_type &val) {
66  val = floorf(val * powersof10[places]) / powersof10[places];
67  });
68 }
69 
70 /*
71  * Returns the nth prime number.
72  * There are totally 10000 prime numbers within 104000;
73  */
74 int random_sieve(const int nthprime) {
75  int i, m, k;
76  int klimit, nlimit;
77  int *mark;
78 
79  nlimit = 104000;
80 
81  mark = reinterpret_cast<int *>(calloc(nlimit, sizeof(int)));
82 
83  /* Calculate limit for k */
84  klimit = static_cast<int>(sqrt(static_cast<double>(nlimit) + 1));
85 
86  /* Mark the composites */
87  /* Special case */
88  mark[1] = -1;
89 
90  /* Set k=1. Loop until k >= sqrt(n) */
91  for (k = 3; k <= klimit; k = m) {
92  /* Find first non-composite in list > k */
93  for (m = k + 1; m < nlimit; m++)
94  if (!mark[m]) break;
95 
96  /* Mark the numbers 2m, 3m, 4m, ... */
97  for (i = m * 2; i < nlimit; i += m) mark[i] = -1;
98  }
99 
100  /* Now display results - all unmarked numbers are prime */
101  int rcprime = -1;
102  for (k = 0, i = 1; i < nlimit; i++) {
103  if (!mark[i]) {
104  k++;
105  if (k == nthprime + 1) {
106  rcprime = i;
107  break;
108  }
109  }
110  }
111  free(mark);
112  return rcprime;
113 }
114 
115 template <class T>
116 void absmat(T *X) {
117  UVEC negativeIdx = find((*X) < 0);
118  (*X)(negativeIdx) = (*X)(negativeIdx) * -1;
119 }
120 
121 template <class T>
122 void makeSparse(const double sparsity, T(*X)) {
123  // make a matrix sparse
124  srand(RAND_SEED_SPARSE);
125 #pragma omp parallel for
126  for (int j = 0; j < X->n_cols; j++) {
127  for (int i = 0; i < X->n_rows; i++) {
128  if (arma::randu() > sparsity) (*X)(i, j) = 0;
129  }
130  }
131 }
132 
133 template <class T>
134 void randNMF(const UWORD m, const UWORD n, const UWORD k, const double sparsity,
135  T *A) {
136 #ifdef BUILD_SPARSE
137  T temp = arma::sprandu<T>(m, n, sparsity);
138  A = &temp;
139 #else
140  srand(RAND_SEED);
141  MAT W = 10 * arma::randu<MAT>(m, k);
142  MAT H = 10 * arma::randu<MAT>(n, k);
143  if (sparsity < 1) {
144  makeSparse<MAT>(sparsity, &W);
145  makeSparse<MAT>(sparsity, &H);
146  }
147  T temp = ceil(W * trans(H));
148  A = &temp;
149 #endif
150 }
151 
152 template <class T>
153 void printVector(const std::vector<T> &x) {
154  for (int i = 0; i < x.size(); i++) {
155  INFO << x[i] << ' ';
156  }
157  INFO << std::endl;
158 }
159 
160 std::vector<std::vector<size_t>> cartesian_product(
161  const std::vector<std::vector<size_t>> &v) {
162  std::vector<std::vector<size_t>> s = {{}};
163  for (auto &u : v) {
164  std::vector<std::vector<size_t>> r;
165  for (auto y : u) {
166  for (auto &x : s) {
167  r.push_back(x);
168  r.back().push_back(y);
169  }
170  }
171  s.swap(r);
172  }
173  return s;
174 }
175 
176 /*
177  * can be called by external people for sparse input matrix.
178  */
179 template <class INPUTTYPE, class LRTYPE>
180 double computeObjectiveError(const INPUTTYPE &A, const LRTYPE &W,
181  const LRTYPE &H) {
182  // 1. over all nnz (a_ij - w_i h_j)^2
183  // 2. over all nnz (w_i h_j)^2
184  // 3. Compute R of W ahd L of H through QR
185  // 4. use sgemm to compute RL
186  // 5. use slange to compute ||RL||_F^2
187  // 6. return nnzsse+nnzwh-||RL||_F^2
188  UWORD k = W.n_cols;
189  UWORD m = A.n_rows;
190  UWORD n = A.n_cols;
191  tic();
192  double nnzsse = 0;
193  double nnzwh = 0;
194  LRTYPE Rw(k, k);
195  LRTYPE Rh(k, k);
196  LRTYPE Qw(m, k);
197  LRTYPE Qh(n, k);
198  LRTYPE RwRh(k, k);
199 #pragma omp parallel for reduction(+ : nnzsse, nnzwh)
200  for (UWORD jj = 1; jj <= A.n_cols; jj++) {
201  UWORD startIdx = A.col_ptrs[jj - 1];
202  UWORD endIdx = A.col_ptrs[jj];
203  UWORD col = jj - 1;
204  double nnzssecol = 0;
205  double nnzwhcol = 0;
206  for (UWORD ii = startIdx; ii < endIdx; ii++) {
207  UWORD row = A.row_indices[ii];
208  double tempsum = 0;
209  for (UWORD kk = 0; kk < k; kk++) {
210  tempsum += (W(row, kk) * H(col, kk));
211  }
212  nnzwhcol += tempsum * tempsum;
213  nnzssecol += (A.values[ii] - tempsum) * (A.values[ii] - tempsum);
214  }
215  nnzsse += nnzssecol;
216  nnzwh += nnzwhcol;
217  }
218  qr_econ(Qw, Rw, W);
219  qr_econ(Qh, Rh, H);
220  RwRh = Rw * Rh.t();
221  double normWH = arma::norm(RwRh, "fro");
222  Rw.clear();
223  Rh.clear();
224  Qw.clear();
225  Qh.clear();
226  RwRh.clear();
227  INFO << "error compute time " << toc() << std::endl;
228  double fastErr = sqrt(nnzsse + (normWH * normWH - nnzwh));
229  return (fastErr);
230 }
231 
232 #if defined(MKL_FOUND) && defined(BUILD_SPARSE)
233 /*
234  * mklMat is csc representation
235  * Bt is the row major order of the arma B matrix
236  * Ct is the row major order of the arma C matrix
237  * Once you receive Ct, transpose again to print
238  * C using arma
239  */
240 void ARMAMKLSCSCMM(const SP_MAT &mklMat, char transa, const MAT &Bt,
241  double *Ct) {
242  MKL_INT m, k, n, nnz;
243  m = static_cast<MKL_INT>(mklMat.n_rows);
244  k = static_cast<MKL_INT>(mklMat.n_cols);
245  n = static_cast<MKL_INT>(Bt.n_rows);
246  // MAT B = B.t();
247  // C = alpha * A * B + beta * C;
248  // mkl_?cscmm - https://software.MKL_INTel.com/en-us/node/468598
249  // char transa = 'N';
250  double alpha = 1.0;
251  double beta = 0.0;
252  char *matdescra = "GUNC";
253  MKL_INT ldb = n;
254  MKL_INT ldc = n;
255  MKL_INT *pntrb = static_cast<MKL_INT *>(mklMat.col_ptrs);
256  MKL_INT *pntre = pntrb + 1;
257  mkl_dcscmm(&transa, &m, &n, &k, &alpha, matdescra, mklMat.values,
258  static_cast<MKL_INT *> mklMat.row_indices, pntrb, pntre,
259  static_cast<double *>(Bt.memptr()), &ldb, &beta, Ct, &ldc);
260 }
261 #endif
262 
263 /*
264  * This is an sgemm wrapper for armadillo matrices
265  * Something is going crazy with armadillo
266  */
267 
268 void cblas_sgemm(const MAT &A, const MAT &B, double *C) {
269  UWORD m = A.n_rows;
270  UWORD n = B.n_cols;
271  UWORD k = A.n_cols;
272  double alpha = 1.0;
273  double beta = 0.0;
274  cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha,
275  A.memptr(), m, B.memptr(), k, beta, C, m);
276 }
277 
278 #endif // COMMON_UTILS_HPP_
void randNMF(const UWORD m, const UWORD n, const UWORD k, const double sparsity, T *A)
Definition: utils.hpp:134
#define NUMBEROF_DECIMAL_PLACES
Definition: utils.h:46
void makeSparse(const double sparsity, T(*X))
Definition: utils.hpp:122
void tic()
start the timer. easy to call as tic(); some code; double t=toc();
Definition: utils.hpp:42
void fixNumericalError(T *X, const double prec=EPSILON_1EMINUS16)
Definition: utils.hpp:58
static std::stack< double > tictoc_stack_omp_clock
Definition: utils.hpp:38
#define EPSILON_1EMINUS16
Definition: utils.h:43
double computeObjectiveError(const INPUTTYPE &A, const LRTYPE &W, const LRTYPE &H)
Definition: utils.hpp:180
std::vector< std::vector< size_t > > cartesian_product(const std::vector< std::vector< size_t >> &v)
Definition: utils.hpp:160
#define UVEC
Definition: utils.h:58
void cblas_sgemm(const MAT &A, const MAT &B, double *C)
Definition: utils.hpp:268
#define SP_MAT
Definition: utils.h:57
static std::stack< std::chrono::steady_clock::time_point > tictoc_stack
Definition: utils.hpp:37
#define INFO
Definition: utils.h:36
void printVector(const std::vector< T > &x)
Definition: utils.hpp:153
static ULONG powersof10[16]
Definition: utils.hpp:20
#define UWORD
Definition: utils.h:60
#define MAT
Definition: utils.h:52
double toc()
Definition: utils.hpp:48
unsigned long ULONG
Definition: utils.h:69
void absmat(T *X)
Definition: utils.hpp:116
#define RAND_SEED
Definition: utils.h:47
#define RAND_SEED_SPARSE
Definition: utils.h:48
int random_sieve(const int nthprime)
Definition: utils.hpp:74
void fixDecimalPlaces(T *X, const int places=NUMBEROF_DECIMAL_PLACES)
Definition: utils.hpp:64