de/d5d/nmf_8hpp_source.html

 /* Copyright 2016 Ramakrishnan Kannan */
 #ifndef COMMON_NMF_HPP_
 #define COMMON_NMF_HPP_
 #include <assert.h>
 #include <string>
 #include "common/utils.hpp"

 // #ifndef _VERBOSE
 // #define _VERBOSE 1;
 // #endif

 #define NUM_THREADS 4
 #define CONV_ERR 0.000001
 #define NUM_STATS 9

 // #ifndef COLLECTSTATS
 // #define COLLECTSTATS 1
 // #endif

 namespace planc {

 // T must be a either an instance of MAT or sp_MAT
 template <class T>
 class NMF {
  protected:
   T A;
   MAT W, H;
   MAT Winit, Hinit;
   UINT m, n, k;

   /*
    * Collected statistics are
    * iteration Htime Wtime totaltime normH normW densityH densityW relError
    */
   MAT stats;
   double objective_err;
   double normA, normW, normH;
   double densityW, densityH;
   bool cleared;
   unsigned int m_num_iterations;
   std::string input_file_name;
   MAT errMtx;       // used for error computation.
   T A_err_sub_mtx;  // used for error computation.
   FVEC m_regW;
   FVEC m_regH;

   void collectStats(int iteration) {
     this->normW = arma::norm(this->W, "fro");
     this->normH = arma::norm(this->H, "fro");
     UVEC nnz = find(this->W > 0);
     this->densityW = nnz.size() / (this->m * this->k);
     nnz.clear();
     nnz = find(this->H > 0);
     this->densityH = nnz.size() / (this->m * this->k);
     this->stats(iteration, 4) = this->normH;
     this->stats(iteration, 5) = this->normW;
     this->stats(iteration, 6) = this->densityH;
     this->stats(iteration, 7) = this->densityW;
     this->stats(iteration, 8) = this->objective_err;
   }

   void applyReg(const FVEC &reg, MAT *AtA) {
     // Frobenius norm regularization
     if (reg(0) > 0) {
       MAT identity = arma::eye<MAT>(this->k, this->k);
       float lambda_l2 = reg(0);
       (*AtA) = (*AtA) + 2 * lambda_l2 * identity;
     }

     // L1 - norm regularization
     if (reg(1) > 0) {
       MAT onematrix = arma::ones<MAT>(this->k, this->k);
       float lambda_l1 = reg(1);
       (*AtA) = (*AtA) + 2 * lambda_l1 * onematrix;
     }
   }

   void normalize_by_W() {
     MAT W_square = arma::pow(this->W, 2);
     ROWVEC norm2 = arma::sqrt(arma::sum(W_square, 0));
     for (unsigned int i = 0; i < this->k; i++) {
       if (norm2(i) > 0) {
         this->W.col(i) = this->W.col(i) / norm2(i);
         this->H.col(i) = this->H.col(i) * norm2(i);
       }
     }
   }

  private:
   void otherInitializations() {
     this->stats.zeros();
     this->cleared = false;
     this->normA = arma::norm(this->A, "fro");
     this->m_num_iterations = 20;
     this->objective_err = 1000000000000;
     this->stats.resize(m_num_iterations + 1, NUM_STATS);
   }

  public:
   NMF(const T &input, const unsigned int rank) {
     this->A = input;
     this->m = A.n_rows;
     this->n = A.n_cols;
     this->k = rank;
     // prime number closer to W.
     arma::arma_rng::set_seed(89);
     this->W = arma::randu<MAT>(m, k);
     // prime number close to H
     arma::arma_rng::set_seed(73);
     this->H = arma::randu<MAT>(n, k);
     this->m_regW = arma::zeros<FVEC>(2);
     this->m_regH = arma::zeros<FVEC>(2);
     normalize_by_W();

     // make the random MATrix positive
     // absMAT<MAT>(W);
     // absMAT<MAT>(H);
     // other intializations
     this->otherInitializations();
   }
   NMF(const T &input, const MAT &leftlowrankfactor,
       const MAT &rightlowrankfactor) {
     assert(leftlowrankfactor.n_cols == rightlowrankfactor.n_cols);
     this->A = input;
     this->W = leftlowrankfactor;
     this->H = rightlowrankfactor;
     this->Winit = this->W;
     this->Hinit = this->H;
     this->m = A.n_rows;
     this->n = A.n_cols;
     this->k = W.n_cols;
     this->m_regW = arma::zeros<FVEC>(2);
     this->m_regH = arma::zeros<FVEC>(2);

     // other initializations
     this->otherInitializations();
   }

   virtual void computeNMF() = 0;

   MAT getLeftLowRankFactor() { return W; }
   MAT getRightLowRankFactor() { return H; }

   /*
    * A is mxn
    * Wr is mxk will be overwritten. Must be passed with values of W.
    * Hr is nxk will be overwritten. Must be passed with values of H.
    * All MATrices are in row major forMAT
    * ||A-WH||_F^2 = over all nnz (a_ij - w_i h_j)^2 +
    *           over all zeros (w_i h_j)^2
    *         = over all nnz (a_ij - w_i h_j)^2 +
    ||WH||_F^2 - over all nnz (w_i h_j)^2
    *
    */
 #if 0
     void computeObjectiveError() {
         // 1. over all nnz (a_ij - w_i h_j)^2
         // 2. over all nnz (w_i h_j)^2
         // 3. Compute R of W ahd L of H through QR
         // 4. use sgemm to compute RL
         // 5. use slange to compute ||RL||_F^2
         // 6. return nnzsse+nnzwh-||RL||_F^2
         tic();
         float nnzsse = 0;
         float nnzwh  = 0;
         MAT  Rw(this->k, this->k);
         MAT  Rh(this->k, this->k);
         MAT  Qw(this->m, this->k);
         MAT  Qh(this->n, this->k);
         MAT  RwRh(this->k, this->k);

         // #pragma omp parallel for reduction (+ : nnzsse,nnzwh)
         for (UWORD jj = 1; jj <= this->A.n_cols; jj++) {
             UWORD startIdx  = this->A.col_ptrs[jj - 1];
             UWORD endIdx    = this->A.col_ptrs[jj];
             UWORD col       = jj - 1;
             float nnzssecol = 0;
             float nnzwhcol  = 0;

             for (UWORD ii = startIdx; ii < endIdx; ii++) {
                 UWORD row     = this->A.row_indices[ii];
                 float tempsum = 0;

                 for (UWORD kk = 0; kk < k; kk++) {
                     tempsum += (this->W(row, kk) * this->H(col, kk));
                 }
                 nnzwhcol  += tempsum * tempsum;
                 nnzssecol += (this->A.values[ii] - tempsum)
                              * (this->A.values[ii] - tempsum);
             }
             nnzsse += nnzssecol;
             nnzwh  += nnzwhcol;
         }
         qr_econ(Qw, Rw, this->W);
         qr_econ(Qh, Rh, this->H);
         RwRh = Rw * Rh.t();
         float normWH = arma::norm(RwRh, "fro");
         Rw.clear();
         Rh.clear();
         Qw.clear();
         Qh.clear();
         RwRh.clear();
         INFO << "error compute time " << toc() << std::endl;
         float fastErr = sqrt(nnzsse + (normWH * normWH - nnzwh));
         this->objective_err = fastErr;

         // return (fastErr);
     }

 #else  // ifdef BUILD_SPARSE
   void computeObjectiveError() {
     // (init.norm_A)^2 - 2*trace(H'*(A'*W))+trace((W'*W)*(H*H'))
     // MAT WtW = this->W.t() * this->W;
     // MAT HtH = this->H.t() * this->H;
     // MAT AtW = this->A.t() * this->W;

     // double sqnormA  = this->normA * this->normA;
     // double TrHtAtW  = arma::trace(this->H.t() * AtW);
     // double TrWtWHtH = arma::trace(WtW * HtH);

     // this->objective_err = sqnormA - (2 * TrHtAtW) + TrWtWHtH;
 #ifdef _VERBOSE
     INFO << "Entering computeObjectiveError A=" << this->A.n_rows << "x"
          << this->A.n_cols << " W = " << this->W.n_rows << "x" << this->W.n_cols
          << " H=" << this->H.n_rows << "x" << this->H.n_cols << std::endl;
 #endif
     tic();
     // always restrict the errMtx size to fit it in memory
     // and doesn't occupy much space.
     // For eg., the max we can have only 3 x 10^6 elements.
     // The number of columns must be chosen appropriately.
     UWORD PER_SPLIT = std::ceil((3 * 1e6) / A.n_rows);
     // UWORD PER_SPLIT = 1;
     // always colSplit. Row split is really slow as the matrix is col major
     // always
     bool colSplit = true;
     // if (this->A.n_rows > PER_SPLIT || this->A.n_cols > PER_SPLIT) {
     uint numSplits = 1;
     MAT Ht = this->H.t();
     if (this->A.n_cols > PER_SPLIT) {
       // if (this->A.n_cols < this->A.n_rows)
       //     colSplit = false;
       if (colSplit)
         numSplits = A.n_cols / PER_SPLIT;
       else
         numSplits = A.n_rows / PER_SPLIT;
       // #ifdef _VERBOSE
     } else {
       PER_SPLIT = A.n_cols;
       numSplits = 1;
     }
 #ifdef _VERBOSE
     INFO << "PER_SPLIT = " << PER_SPLIT << "numSplits = " << numSplits
          << std::endl;
 #endif
     // #endif
     VEC splitErr = arma::zeros<VEC>(numSplits + 1);
     // allocate one and never allocate again.
     if (colSplit && errMtx.n_rows == 0 && errMtx.n_cols == 0) {
       errMtx = arma::zeros<MAT>(A.n_rows, PER_SPLIT);
       A_err_sub_mtx = arma::zeros<T>(A.n_rows, PER_SPLIT);
     } else {
       errMtx = arma::zeros<MAT>(PER_SPLIT, A.n_cols);
       A_err_sub_mtx = arma::zeros<T>(PER_SPLIT, A.n_cols);
     }
     for (unsigned int i = 0; i <= numSplits; i++) {
       UWORD beginIdx = i * PER_SPLIT;
       UWORD endIdx = (i + 1) * PER_SPLIT - 1;
       if (colSplit) {
         if (endIdx > A.n_cols) endIdx = A.n_cols - 1;
         if (beginIdx < endIdx) {
 #ifdef _VERBOSE
           INFO << "beginIdx=" << beginIdx << " endIdx= " << endIdx << std::endl;
           INFO << "Ht = " << Ht.n_rows << "x" << Ht.n_cols << std::endl;

 #endif
           errMtx = W * Ht.cols(beginIdx, endIdx);
           A_err_sub_mtx = A.cols(beginIdx, endIdx);
         } else if (beginIdx == endIdx && beginIdx < A.n_cols) {
           errMtx = W * Ht.col(beginIdx);
           A_err_sub_mtx = A.col(beginIdx);
         }
       } else {
         if (endIdx > A.n_rows) endIdx = A.n_rows - 1;
 #ifdef _VERBOSE
         INFO << "beginIdx=" << beginIdx << " endIdx= " << endIdx << std::endl;
 #endif
         if (beginIdx < endIdx) {
           A_err_sub_mtx = A.rows(beginIdx, endIdx);
           errMtx = W.rows(beginIdx, endIdx) * Ht;
         }
       }
       A_err_sub_mtx -= errMtx;
       A_err_sub_mtx %= A_err_sub_mtx;
       splitErr(i) = arma::accu(A_err_sub_mtx);
     }
     double err_time = toc();
     INFO << "err compute time::" << err_time << std::endl;
     this->objective_err = arma::sum(splitErr);
   }

 #endif  // ifdef BUILD_SPARSE
   void computeObjectiveError(const T &At, const MAT &WtW, const MAT &HtH) {
     MAT AtW = At * this->W;

     double sqnormA = this->normA * this->normA;
     double TrHtAtW = arma::trace(this->H.t() * AtW);
     double TrWtWHtH = arma::trace(WtW * HtH);

     this->objective_err = sqnormA - (2 * TrHtAtW) + TrWtWHtH;
   }
   void num_iterations(const int it) { this->m_num_iterations = it; }
   void regW(const FVEC &iregW) { this->m_regW = iregW; }
   void regH(const FVEC &iregH) { this->m_regH = iregH; }
   FVEC regW() { return this->m_regW; }
   FVEC regH() { return this->m_regH; }
   const unsigned int num_iterations() const { return m_num_iterations; }

   ~NMF() { clear(); }
   void clear() {
     if (!this->cleared) {
       this->A.clear();
       this->W.clear();
       this->H.clear();
       this->stats.clear();
       if (errMtx.n_rows != 0 && errMtx.n_cols != 0) {
         errMtx.clear();
         A_err_sub_mtx.clear();
       }
       this->cleared = true;
     }
   }
 };
 }  // namespace planc
 #endif  // COMMON_NMF_HPP_
planc::NMF::NMF
NMF(const T &input, const unsigned int rank)
Constructors with an input matrix and low rank.
Definition: nmf.hpp:119

planc::NMF::~NMF
~NMF()
Definition: nmf.hpp:352

planc::NMF::regW
void regW(const FVEC &iregW)
Sets the regularization on left low rank factor W.
Definition: nmf.hpp:342

planc::NMF::computeObjectiveError
void computeObjectiveError(const T &At, const MAT &WtW, const MAT &HtH)
Definition: nmf.hpp:330

tic
void tic()
start the timer. easy to call as tic(); some code; double t=toc();
Definition: utils.hpp:42

utils.hpp

planc::NMF::clear
void clear()
Clear the memory for input matrix A, right low rank factor W and left low rank factor H...
Definition: nmf.hpp:355

planc::NMF::num_iterations
const unsigned int num_iterations() const
Returns the number of iterations.
Definition: nmf.hpp:350

FVEC
#define FVEC
Definition: utils.h:55

UVEC
#define UVEC
Definition: utils.h:58

toc
double toc()
Definition: utils.hpp:48

planc::NMF::getRightLowRankFactor
MAT getRightLowRankFactor()
Returns the right low rank factor matrix H.
Definition: nmf.hpp:169

planc::NMF::num_iterations
void num_iterations(const int it)
Sets number of iterations for the NMF algorithms.
Definition: nmf.hpp:340

INFO
#define INFO
Definition: utils.h:36

planc::NMF::regH
FVEC regH()
Returns the L2 and L1 regularization parameters of W as a vector.
Definition: nmf.hpp:348

UWORD
#define UWORD
Definition: utils.h:60

planc::NMF::NMF
NMF(const T &input, const MAT &leftlowrankfactor, const MAT &rightlowrankfactor)
Constructor with initial left and right low rank factors Necessary when you want to compare algorithm...
Definition: nmf.hpp:146

planc::NMF::computeNMF
virtual void computeNMF()=0

UINT
unsigned int UINT
Definition: utils.h:68

MAT
#define MAT
Definition: utils.h:52

planc::NMF::getLeftLowRankFactor
MAT getLeftLowRankFactor()
Returns the left low rank factor matrix W.
Definition: nmf.hpp:167

NUM_STATS
#define NUM_STATS
Definition: nmf.hpp:14

planc::NMF::regW
FVEC regW()
Returns the L2 and L1 regularization parameters of W as a vector.
Definition: nmf.hpp:346

ROWVEC
#define ROWVEC
Definition: utils.h:54

planc
ncp_factors contains the factors of the ncp every ith factor is of size n_i * k number of factors is ...
Definition: ncpfactors.hpp:20

VEC
#define VEC
Definition: utils.h:61

planc::NMF::computeObjectiveError
void computeObjectiveError()
Definition: nmf.hpp:238

planc::NMF::regH
void regH(const FVEC &iregH)
Sets the regularization on right low rank H.
Definition: nmf.hpp:344

planc::NMF
Definition: nmf.hpp:24