planc
Parallel Lowrank Approximation with Non-negativity Constraints
aunmf.hpp
Go to the documentation of this file.
1 /* Copyright 2016 Ramakrishnan Kannan */
2 
3 #ifndef DISTNMF_AUNMF_HPP_
4 #define DISTNMF_AUNMF_HPP_
5 
6 #include <mpi.h>
7 #include <armadillo>
8 #include <string>
9 #include <vector>
10 #include "distnmf/distnmf.hpp"
11 #include "distnmf/mpicomm.hpp"
12 
23 namespace planc {
24 
25 template <class INPUTMATTYPE>
26 class DistAUNMF : public DistNMF<INPUTMATTYPE> {
27  // needed in derived algorithms to
28  // call BPP routines
29  protected:
30  MAT HtH;
31  MAT WtW;
32  MAT AHtij;
33  MAT WtAij;
34  MAT Wt;
35  MAT Ht;
36 
37  virtual void updateW() = 0;
38  virtual void updateH() = 0;
39 
40  private:
41  // Things needed while solving for W
42  MAT localHtH;
43  MAT Hjt, Hj;
44  MAT AijHj, AijHjt;
45  INPUTMATTYPE A_ij_t;
46  // Things needed while solving for H
47  MAT localWtW;
48  MAT Wit, Wi;
49  MAT WitAij, AijWit;
50 
51  // needed for error computation
52  MAT prevH; // used for error computation
53  MAT prevHtH; // used for error computation
54  MAT WtAijH;
55  MAT localWtAijH;
56  MAT errMtx;
57  MAT A_errMtx;
58 
59  // needed for block implementation to save memory
60  MAT Ht_blk;
61  MAT AHtij_blk;
62  MAT Wt_blk;
63  MAT WtAij_blk;
64 
65  std::vector<int> recvWtAsize;
66  std::vector<int> recvAHsize;
67 
68  int num_k_blocks;
69  int perk;
70 
74  void allocateMatrices() {
75  // collective call related initializations.
76  // These initialization are for solving W.
77  DISTPRINTINFO("k::" << this->k << "::perk::" << this->perk
78  << "::localm::" << this->m << "::localn::" << this->n
79  << "::globalm::" << this->globalm() << "::globaln::"
80  << this->globaln() << "::MPI_SIZE::" << MPI_SIZE);
81  HtH.zeros(this->k, this->k);
82  localHtH.zeros(this->k, this->k);
83  Hj.zeros(this->n, this->perk);
84  Hjt.zeros(this->perk, this->n);
85  AijHj.zeros(this->m, this->perk);
86  AijHjt.zeros(this->perk, this->m);
87  AHtij.zeros(this->k, this->globalm() / MPI_SIZE);
88  this->recvAHsize.resize(NUMCOLPROCS);
89  int fillsize = this->perk * (this->globalm() / MPI_SIZE);
90  fillVector<int>(fillsize, &recvAHsize);
91 #ifdef MPI_VERBOSE
92  if (ISROOT) {
93  INFO << "::recvAHsize::";
94  printVector<int>(recvAHsize);
95  }
96 #endif
97  // allocated for block implementation.
98  Ht_blk.zeros(this->perk, (this->globalm() / MPI_SIZE));
99  AHtij_blk.zeros(this->perk, this->globalm() / MPI_SIZE);
100 
101  // These initialization are for solving H.
102  Wt.zeros(this->k, this->globalm() / MPI_SIZE);
103  WtW.zeros(this->k, this->k);
104  localWtW.zeros(this->k, this->k);
105  Wi.zeros(this->m, this->perk);
106  Wit.zeros(this->perk, this->m);
107  WitAij.zeros(this->perk, this->n);
108  AijWit.zeros(this->n, this->perk);
109  WtAij.zeros(this->k, this->globaln() / MPI_SIZE);
110  this->recvWtAsize.resize(NUMROWPROCS);
111  fillsize = this->perk * (this->globaln() / MPI_SIZE);
112  fillVector<int>(fillsize, &recvWtAsize);
113 
114  // allocated for block implementation
115  Wt_blk.zeros(this->perk, this->globalm() / MPI_SIZE);
116  WtAij_blk.zeros(this->perk, this->globaln() / MPI_SIZE);
117 #ifdef MPI_VERBOSE
118  if (ISROOT) {
119  INFO << "::recvWtAsize::";
120  printVector<int>(recvWtAsize);
121  }
122 #endif
123 #ifndef BUILD_SPARSE
124  if (this->is_compute_error()) {
125  errMtx.zeros(this->m, this->n);
126  A_errMtx.zeros(this->m, this->n);
127  }
128 #endif
129  }
130 
131  void freeMatrices() {
132  HtH.clear();
133  localHtH.clear();
134  Hj.clear();
135  Hjt.clear();
136  AijHj.clear();
137  AijHjt.clear();
138  AHtij.clear();
139  Wt.clear();
140  WtW.clear();
141  localWtW.clear();
142  Wi.clear();
143  Wit.clear();
144  WitAij.clear();
145  AijWit.clear();
146  WtAij.clear();
147  A_ij_t.clear();
148  if (this->is_compute_error()) {
149  prevH.clear();
150  prevHtH.clear();
151  WtAijH.clear();
152  localWtAijH.clear();
153  }
154  Ht_blk.clear();
155  AHtij_blk.clear();
156  Wt_blk.clear();
157  WtAij_blk.clear();
158  if (this->is_compute_error()) {
159  errMtx.clear();
160  A_errMtx.clear();
161  }
162  }
163 
164  public:
175  DistAUNMF(const INPUTMATTYPE &input, const MAT &leftlowrankfactor,
176  const MAT &rightlowrankfactor, const MPICommunicator &communicator,
177  const int numkblks)
178  : DistNMF<INPUTMATTYPE>(input, leftlowrankfactor, rightlowrankfactor,
179  communicator) {
180  num_k_blocks = numkblks;
181  perk = this->k / num_k_blocks;
182  allocateMatrices();
183  this->Wt = leftlowrankfactor.t();
184  this->Ht = rightlowrankfactor.t();
185  A_ij_t = input.t();
186  PRINTROOT("aunmf()::constructor succesful");
187  }
189  // freeMatrices();
190  }
191 
205  void distWtA() {
206  for (int i = 0; i < num_k_blocks; i++) {
207  int start_row = i * perk;
208  int end_row = (i + 1) * perk - 1;
209  Wt_blk = Wt.rows(start_row, end_row);
210  distWtABlock();
211  WtAij.rows(start_row, end_row) = WtAij_blk;
212  }
213  }
214  void distWtABlock() {
215 #ifdef USE_PACOSS
216  // Perform expand communication using Pacoss.
217  memcpy(Wit.memptr(), Wt_blk.memptr(),
218  Wt_blk.n_rows * Wt_blk.n_cols * sizeof(Wt_blk[0]));
219  MPITIC;
220  this->m_rowcomm->expCommBegin(Wit.memptr(), this->perk);
221  this->m_rowcomm->expCommFinish(Wit.memptr(), this->perk);
222 #else
223  int sendcnt = (this->globalm() / MPI_SIZE) * this->perk;
224  int recvcnt = (this->globalm() / MPI_SIZE) * this->perk;
225  Wit.zeros();
226  MPITIC; // allgather WtA
227  MPI_Allgather(Wt_blk.memptr(), sendcnt, MPI_DOUBLE, Wit.memptr(), recvcnt,
228  MPI_DOUBLE, this->m_mpicomm.commSubs()[1]);
229 #endif
230  double temp = MPITOC; // allgather WtA
231  PRINTROOT("n::" << this->n << "::k::" << this->k << PRINTMATINFO(Wt)
232  << PRINTMATINFO(Wit));
233 #ifdef MPI_VERBOSE
234  DISTPRINTINFO(PRINTMAT(Wt_blk));
235  DISTPRINTINFO(PRINTMAT(Wit));
236 #endif
237  this->time_stats.communication_duration(temp);
238  this->time_stats.allgather_duration(temp);
239  MPITIC; // mm WtA
240  this->WitAij = this->Wit * this->A;
241  // #if defined(MKL_FOUND) && defined(BUILD_SPARSE)
242  // // void ARMAMKLSCSCMM(const SRC &mklMat, const DESTN &Bt, const char
243  // transa,
244  // // DESTN *Ct)
245  // ARMAMKLSCSCMM(this->A_ij_t, 'N', this->Wit, this->AijWit.memptr());
246  // #ifdef MPI_VERBOSE
247  // DISTPRINTINFO(PRINTMAT(this->AijWit));
248  // #endif
249  // this->WitAij = reshape(this->AijWit, this->k, this->n);
250  // #else
251  // this->WitAij = this->Wit * this->A;
252  // #endif
253  temp = MPITOC; // mm WtA
254 #ifdef MPI_VERBOSE
255  DISTPRINTINFO(PRINTMAT(this->WitAij));
256 #endif
257  PRINTROOT(PRINTMATINFO(this->A)
258  << PRINTMATINFO(this->Wit) << PRINTMATINFO(this->WitAij));
259  this->time_stats.compute_duration(temp);
260  this->time_stats.mm_duration(temp);
261  this->reportTime(temp, "WtA::");
262 #ifdef USE_PACOSS
263  // Perform fold communication using Pacoss.
264  MPITIC;
265  this->m_colcomm->foldCommBegin(WitAij.memptr(), this->perk);
266  this->m_colcomm->foldCommFinish(WitAij.memptr(), this->perk);
267  temp = MPITOC;
268  memcpy(WtAij_blk.memptr(), WitAij.memptr(),
269  WtAij_blk.n_rows * WtAij_blk.n_cols * sizeof(WtAij_blk[0]));
270 #else
271  WtAij_blk.zeros();
272  MPITIC; // reduce_scatter WtA
273  MPI_Reduce_scatter(this->WitAij.memptr(), this->WtAij_blk.memptr(),
274  &(this->recvWtAsize[0]), MPI_DOUBLE, MPI_SUM,
275  this->m_mpicomm.commSubs()[0]);
276  temp = MPITOC; // reduce_scatter WtA
277 #endif
278  this->time_stats.communication_duration(temp);
279  this->time_stats.reducescatter_duration(temp);
280  }
292  void distAH() {
293  for (int i = 0; i < num_k_blocks; i++) {
294  int start_row = i * perk;
295  int end_row = (i + 1) * perk - 1;
296  Ht_blk = Ht.rows(start_row, end_row);
297  distAHBlock();
298  AHtij.rows(start_row, end_row) = AHtij_blk;
299  }
300  }
301  void distAHBlock() {
302  /*
303  DISTPRINTINFO("distAH::" << "::Acolst::" \
304  Acolst.n_rows<<"x"<<Acolst.n_cols \
305  << "::norm::" << arma::norm(Acolst, "fro"));
306  DISTPRINTINFO("distAH::" << "::H::" \
307  << this->H.n_rows << "x" << this->H.n_cols);
308  */
309 #ifdef USE_PACOSS
310  // Perform expand communication using Pacoss.
311  memcpy(Hjt.memptr(), Ht_blk.memptr(),
312  Ht_blk.n_rows * Ht_blk.n_cols * sizeof(Ht_blk[0]));
313  MPITIC;
314  this->m_colcomm->expCommBegin(Hjt.memptr(), this->perk);
315  this->m_colcomm->expCommFinish(Hjt.memptr(), this->perk);
316 #else
317  int sendcnt = (this->globaln() / MPI_SIZE) * this->perk;
318  int recvcnt = (this->globaln() / MPI_SIZE) * this->perk;
319  Hjt.zeros();
320  MPITIC; // allgather AH
321  MPI_Allgather(this->Ht_blk.memptr(), sendcnt, MPI_DOUBLE,
322  this->Hjt.memptr(), recvcnt, MPI_DOUBLE,
323  this->m_mpicomm.commSubs()[0]);
324 #endif
325  PRINTROOT("n::" << this->n << "::k::" << this->k << PRINTMATINFO(Ht)
326  << PRINTMATINFO(Hjt));
327  double temp = MPITOC; // allgather AH
328 #ifdef MPI_VERBOSE
329  DISTPRINTINFO(PRINTMAT(Ht_blk));
330  DISTPRINTINFO(PRINTMAT(Hjt));
331  DISTPRINTINFO(PRINTMAT(this->A_ij_t));
332 #endif
333  this->time_stats.communication_duration(temp);
334  this->time_stats.allgather_duration(temp);
335  MPITIC; // mm AH
336  this->AijHjt = this->Hjt * this->A_ij_t;
337  // #if defined(MKL_FOUND) && defined(BUILD_SPARSE)
338  // // void ARMAMKLSCSCMM(const SRC &mklMat, const DESTN &Bt, const char
339  // transa,
340  // // DESTN *Ct)
341  // ARMAMKLSCSCMM(this->A, 'N', this->Hjt, this->AijHj.memptr());
342  // #ifdef MPI_VERBOSE
343  // DISTPRINTINFO(PRINTMAT(this->AijHj));
344  // #endif
345  // this->AijHjt = reshape(this->AijHj, this->k, this->m);
346  // #else
347  // this->AijHjt = this->Hjt * this->A_ij_t;
348  // #endif
349  // #ifdef MPI_VERBOSE
350  // DISTPRINTINFO(PRINTMAT(this->AijHjt));
351  // #endif
352  temp = MPITOC; // mm AH
353  PRINTROOT(PRINTMATINFO(this->A_ij_t)
354  << PRINTMATINFO(this->Hjt) << PRINTMATINFO(this->AijHjt));
355  this->time_stats.compute_duration(temp);
356  this->time_stats.mm_duration(temp);
357  this->reportTime(temp, "AH::");
358 #ifdef USE_PACOSS
359  // Perform fold communication using Pacoss.
360  MPITIC;
361  this->m_rowcomm->foldCommBegin(AijHjt.memptr(), this->perk);
362  this->m_rowcomm->foldCommFinish(AijHjt.memptr(), this->perk);
363  temp = MPITOC;
364  memcpy(AHtij_blk.memptr(), AijHjt.memptr(),
365  AHtij_blk.n_rows * AHtij_blk.n_cols * sizeof(AHtij_blk[0]));
366 #else
367  AHtij_blk.zeros();
368  MPITIC; // reduce_scatter AH
369  MPI_Reduce_scatter(this->AijHjt.memptr(), this->AHtij_blk.memptr(),
370  &(this->recvAHsize[0]), MPI_DOUBLE, MPI_SUM,
371  this->m_mpicomm.commSubs()[1]);
372  temp = MPITOC; // reduce_scatter AH
373 #endif
374  this->time_stats.communication_duration(temp);
375  this->time_stats.reducescatter_duration(temp);
376  }
386  void distInnerProduct(const MAT &X, MAT *XtX) {
387  // each process computes its own kxk matrix
388  MPITIC; // gram
389  localWtW = X.t() * X;
390 #ifdef MPI_VERBOSE
391  DISTPRINTINFO("W::" << norm(X, "fro")
392  << "::localWtW::" << norm(this->localWtW, "fro"));
393 #endif
394  double temp = MPITOC; // gram
395  this->time_stats.compute_duration(temp);
396  this->time_stats.gram_duration(temp);
397  (*XtX).zeros();
398  if (X.n_rows == this->m) {
399  this->reportTime(temp, "Gram::W::");
400  } else {
401  this->reportTime(temp, "Gram::H::");
402  }
403  MPITIC; // allreduce gram
404  MPI_Allreduce(localWtW.memptr(), (*XtX).memptr(), this->k * this->k,
405  MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
406  temp = MPITOC; // allreduce gram
407  this->time_stats.communication_duration(temp);
408  this->time_stats.allreduce_duration(temp);
409  }
415  void computeNMF() {
416  PRINTROOT("computeNMF started");
417 
418 #ifdef MPI_VERBOSE
419  DISTPRINTINFO(PRINTMAT(this->A));
420 #endif
421  // error computation
422  if (this->is_compute_error()) {
423  prevH.zeros(size(this->H));
424  prevHtH.zeros(this->k, this->k);
425  WtAijH.zeros(this->k, this->k);
426  localWtAijH.zeros(this->k, this->k);
427  }
428 #ifdef __WITH__BARRIER__TIMING__
429  MPI_Barrier(MPI_COMM_WORLD);
430 #endif
431  for (unsigned int iter = 0; iter < this->num_iterations(); iter++) {
432  // saving current instance for error computation.
433  if (iter > 0 && this->is_compute_error()) {
434  this->prevH = this->H;
435  this->prevHtH = this->HtH;
436  }
437  MPITIC; // total_d W&H
438  // update H given WtW and WtA step 4 of the algorithm
439  {
440  // compute WtW
441  this->distInnerProduct(this->W, &this->WtW);
442  PRINTROOT(PRINTMATINFO(this->WtW));
443  this->applyReg(this->regH(), &this->WtW);
444 #ifdef MPI_VERBOSE
445  PRINTROOT(PRINTMAT(this->WtW));
446 #endif
447  // compute WtA
448  this->distWtA();
449  // PRINTROOT(PRINTMATINFO(this->WtAij));
450 #ifdef MPI_VERBOSE
451  DISTPRINTINFO(PRINTMAT(this->WtAij));
452 #endif
453  MPITIC; // nnls H
454  // ensure both Ht and H are consistent after the update
455  // some function find Ht and some H.
456  updateH();
457 #ifdef MPI_VERBOSE
458  DISTPRINTINFO("::it=" << iter << PRINTMAT(this->H));
459 #endif
460  double temp = MPITOC; // nnls H
461  this->time_stats.compute_duration(temp);
462  this->time_stats.nnls_duration(temp);
463  this->reportTime(temp, "NNLS::H::");
464  }
465  // Update W given HtH and AH step 3 of the algorithm.
466  {
467  // compute HtH
468  this->distInnerProduct(this->H, &this->HtH);
469  PRINTROOT("HtH::" << PRINTMATINFO(this->HtH));
470  this->applyReg(this->regW(), &this->HtH);
471 #ifdef MPI_VERBOSE
472  PRINTROOT(PRINTMAT(this->HtH));
473 #endif
474  // compute AH
475  this->distAH();
476  // PRINTROOT(PRINTMATINFO(this->AHtij));
477 #ifdef MPI_VERBOSE
478  DISTPRINTINFO(PRINTMAT(this->AHtij));
479 #endif
480  MPITIC; // nnls W
481  // Update W given HtH and AH step 3 of the algorithm.
482  // ensure W and Wt are consistent. As some algorithms
483  // determine W and some Wt.
484  updateW();
485 #ifdef MPI_VERBOSE
486  DISTPRINTINFO("::it=" << iter << PRINTMAT(this->W));
487 #endif
488  double temp = MPITOC; // nnls W
489  this->time_stats.compute_duration(temp);
490  this->time_stats.nnls_duration(temp);
491  this->reportTime(temp, "NNLS::W::");
492  }
493  this->time_stats.duration(MPITOC); // total_d W&H
494  if (iter > 0 && this->is_compute_error()) {
495 #ifdef BUILD_SPARSE
496  this->computeError(iter);
497 #else
498  this->computeError2(iter);
499 #endif
500 
501  PRINTROOT("it=" << iter << "::algo::" << this->m_algorithm << "::k::"
502  << this->k << "::err::" << sqrt(this->objective_err)
503  << "::relerr::"
504  << sqrt(this->objective_err / this->m_globalsqnormA));
505  }
506  PRINTROOT("completed it=" << iter
507  << "::taken::" << this->time_stats.duration());
508  } // end for loop
509  MPI_Barrier(MPI_COMM_WORLD);
510  this->reportTime(this->time_stats.duration(), "total_d");
511  this->reportTime(this->time_stats.communication_duration(), "total_comm");
512  this->reportTime(this->time_stats.compute_duration(), "total_comp");
513  this->reportTime(this->time_stats.allgather_duration(), "total_allgather");
514  this->reportTime(this->time_stats.allreduce_duration(), "total_allreduce");
515  this->reportTime(this->time_stats.reducescatter_duration(),
516  "total_reducescatter");
517  this->reportTime(this->time_stats.gram_duration(), "total_gram");
518  this->reportTime(this->time_stats.mm_duration(), "total_mm");
519  this->reportTime(this->time_stats.nnls_duration(), "total_nnls");
520  if (this->is_compute_error()) {
521  this->reportTime(this->time_stats.err_compute_duration(),
522  "total_err_compute");
523  this->reportTime(this->time_stats.err_compute_duration(),
524  "total_err_communication");
525  }
526  }
527 
540  void computeError(const int it) {
541  MPITIC; // computeerror
542  this->localWtAijH = this->WtAij * this->prevH;
543 #ifdef MPI_VERBOSE
544  DISTPRINTINFO("::it=" << it << PRINTMAT(this->WtAij));
545  DISTPRINTINFO("::it=" << it << PRINTMAT(this->localWtAijH));
546  DISTPRINTINFO("::it=" << it << PRINTMAT(this->prevH));
547  PRINTROOT("::it=" << it << PRINTMAT(this->WtW));
548  PRINTROOT("::it=" << it << PRINTMAT(this->prevHtH));
549 #endif
550  double temp = MPITOC; // computererror
551  this->time_stats.err_compute_duration(temp);
552  MPITIC; // coommunication error
553  MPI_Allreduce(this->localWtAijH.memptr(), this->WtAijH.memptr(),
554  this->k * this->k, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
555  temp = MPITOC; // communication error
556 #ifdef MPI_VERBOSE
557  DISTPRINTINFO(PRINTMAT(WtAijH));
558 #endif
559  this->time_stats.err_communication_duration(temp);
560  double tWtAijh = trace(this->WtAijH);
561  double tWtWHtH = trace(this->WtW * this->prevHtH);
562  PRINTROOT("::it=" << it << "normA::" << this->m_globalsqnormA
563  << "::tWtAijH::" << 2 * tWtAijh
564  << "::tWtWHtH::" << tWtWHtH);
565  this->objective_err = this->m_globalsqnormA - 2 * tWtAijh + tWtWHtH;
566  }
567  /*
568  * Compute error the old-fashioned way
569  */
570  void computeError2(const int it) {
571  double local_sqerror = 0.0;
572  PRINTROOT("::it=" << it << "::Calling compute error 2");
573  MPITIC;
574  // DISTPRINTINFO("::norm(Wi,fro)::" << norm(this->Wit, "fro") <<
575  // "::norm(Hjt, fro)::" << norm(this->Hjt, "fro"));
576  this->Wi = this->Wit.t();
577  errMtx = this->Wi * this->Hjt;
578  A_errMtx = this->A - errMtx;
579  local_sqerror = norm(A_errMtx, "fro");
580  local_sqerror *= local_sqerror;
581  double temp = MPITOC;
582  this->time_stats.err_compute_duration(temp);
583  // DISTPRINTINFO("::it=" << it << "::local_sqerror::" << local_sqerror);
584  MPITIC;
585  MPI_Allreduce(&local_sqerror, &this->objective_err, 1, MPI_DOUBLE, MPI_SUM,
586  MPI_COMM_WORLD);
587  temp = MPITOC;
588  this->time_stats.err_communication_duration(temp);
589  }
590 };
591 
592 } // namespace planc
593 
594 #endif // DISTNMF_AUNMF_HPP_
#define PRINTMAT(A)
Definition: utils.h:65
const int globaln() const
returns globaln
Definition: distnmf.hpp:86
void computeError(const int it)
We assume this error function will be called in every iteration before updating the block to compute ...
Definition: aunmf.hpp:540
const unsigned int num_iterations() const
Returns the number of iterations.
Definition: nmf.hpp:350
const bool is_compute_error() const
returns the flag to compute error or not.
Definition: distnmf.hpp:92
#define MPITIC
Definition: distutils.h:26
#define MPITOC
Definition: distutils.h:27
void computeError2(const int it)
Definition: aunmf.hpp:570
const int globalm() const
returns globalm
Definition: distnmf.hpp:84
void reportTime(const double temp, const std::string &reportstring)
Reports the time.
Definition: distnmf.hpp:96
#define DISTPRINTINFO(MSG)
Definition: distutils.h:37
void distWtA()
This is a matrix multiplication routine based on reduce_scatter.
Definition: aunmf.hpp:205
void computeNMF()
This is the main loop function Refer Algorithm 1 in Page 3 of the PPoPP HPC-NMF paper.
Definition: aunmf.hpp:415
#define ISROOT
Definition: distutils.h:14
#define INFO
Definition: utils.h:36
FVEC regH()
Returns the L2 and L1 regularization parameters of W as a vector.
Definition: nmf.hpp:348
void distAHBlock()
Definition: aunmf.hpp:301
#define MAT
Definition: utils.h:52
#define NUMROWPROCS
Definition: distutils.h:21
FVEC regW()
Returns the L2 and L1 regularization parameters of W as a vector.
Definition: nmf.hpp:346
void distAH()
There are totally prxpc process.
Definition: aunmf.hpp:292
#define NUMCOLPROCS
Definition: distutils.h:22
DistAUNMF(const INPUTMATTYPE &input, const MAT &leftlowrankfactor, const MAT &rightlowrankfactor, const MPICommunicator &communicator, const int numkblks)
Public constructor with local input matrix, local factors and communicator.
Definition: aunmf.hpp:175
ncp_factors contains the factors of the ncp every ith factor is of size n_i * k number of factors is ...
Definition: ncpfactors.hpp:20
#define PRINTROOT(MSG)
Definition: distutils.h:32
#define PRINTMATINFO(A)
Definition: utils.h:63
void distInnerProduct(const MAT &X, MAT *XtX)
There are p processes.
Definition: aunmf.hpp:386
void distWtABlock()
Definition: aunmf.hpp:214
#define MPI_SIZE
Definition: distutils.h:15