planc
Parallel Lowrank Approximation with Non-negativity Constraints
distntfnes.hpp
Go to the documentation of this file.
1 /* Copyright Ramakrishnan Kannan 2018 */
2 
3 #ifndef DISTNTF_DISTNTFNES_HPP_
4 #define DISTNTF_DISTNTFNES_HPP_
5 
6 #include "distntf/distauntf.hpp"
7 
8 namespace planc {
9 
10 class DistNTFNES : public DistAUNTF {
11  private:
12  // Update Variables
13  NCPFactors m_prox_t; // Proximal Term (H_*)
14  NCPFactors m_acc_t; // Acceleration term (Y)
15  NCPFactors m_grad_t; // Gradient Term (\nabla_f(Y))
16  NCPFactors m_prev_t; // Previous Iterate
17  MAT modified_gram;
18  // Termination Variables
19  double delta1; // Termination value for absmax
20  double delta2; // Termination value for min
21  // Acceleration Variables
22  int acc_start; // Starting iteration for acceleration
23  int acc_exp; // Step size exponent
24  int acc_fails; // Acceleration Fails
25  int fail_limit; // Acceleration Fail limit
26  const int NUM_INNER_ITERS = 20; // Capping inner iter
27  double eig_time;
28  double stop_iter_time;
29  double proj_time;
30  double norm_time;
31 
32  protected:
33  inline double get_lambda(double L, double mu) {
34  double q = L / mu;
35  double lambda = 0.0;
36 
37  if (q > 1e6)
38  lambda = 10 * mu;
39  else if (q > 1e3)
40  lambda = mu;
41  else
42  lambda = mu / 10;
43 
44  return lambda;
45  }
46 
47  inline double get_alpha(double alpha, double q) {
48  /* Solves the quadratic equation
49  x^2 + (\alpha^2 -q)x - \alpha^2 = 0
50  */
51  double a, b, c, D, x;
52  a = 1.0;
53  b = alpha * alpha - q;
54  c = -alpha * alpha;
55  D = b * b - 4 * a * c;
56  x = (-b + sqrt(D)) / 2;
57  return x;
58  }
59 
60  bool stop_iter(const int mode) {
61  MPITIC;
62  bool stop = false;
63  double local_absmax, local_min, global_absmax, global_min;
64  local_absmax = 0.0;
65  if (m_nls_sizes[mode] > 0) {
66  local_absmax =
67  (arma::abs(m_grad_t.factor(mode) % m_acc_t.factor(mode))).max();
68  }
69  MPI_Allreduce(&local_absmax, &global_absmax, 1, MPI_DOUBLE, MPI_MAX,
70  MPI_COMM_WORLD);
71 
72  local_min = 0.0;
73  if (m_nls_sizes[mode] > 0) {
74  local_min = (m_grad_t.factor(mode)).min();
75  }
76  MPI_Allreduce(&local_min, &global_min, 1, MPI_DOUBLE, MPI_MIN,
77  MPI_COMM_WORLD);
78 
79  if (global_absmax <= delta1 && global_min >= -delta2) stop = true;
80  stop_iter_time += MPITOC;
81  return stop;
82  }
83 
84  void accelerate() {
85  int iter = this->current_it();
86  if (iter > acc_start) {
87  int num_modes = m_prox_t.modes();
88  double cur_err = this->current_error();
89 
90  double acc_step = std::pow(iter + 1, (1.0 / acc_exp));
91  // Adjust all the factors
92  // Reusing gradient/acceleration term to save memory
93  m_grad_t.zeros();
94  int lowrank = m_prox_t.rank();
95  MAT scalecur = arma::eye<MAT>(lowrank, lowrank);
96  MAT scaleprev = arma::eye<MAT>(lowrank, lowrank);
97  for (int mode = 0; mode < num_modes; mode++) {
98  if (mode == num_modes - 1) {
99  scalecur = arma::diagmat(this->m_local_ncp_factors.lambda());
100  scaleprev = arma::diagmat(m_prev_t.lambda());
101  }
102  // Step Matrix
103  MAT acc_mat = m_grad_t.factor(mode);
104  acc_mat = (scalecur * this->m_local_ncp_factors_t.factor(mode)) -
105  (scaleprev * m_prev_t.factor(mode));
106  acc_mat *= acc_step;
107  acc_mat += (scalecur * this->m_local_ncp_factors_t.factor(mode));
108 
109  if (m_nls_sizes[mode] == 0) {
110  acc_mat.zeros();
111  }
112  m_acc_t.set(mode, acc_mat);
113  m_acc_t.distributed_normalize_rows(mode);
114  }
115  // Compute Error
116  // Always call with mode 0 to reuse MTTKRP if accepted
117  double acc_err = this->computeError(m_acc_t, 0);
118 
119  // Acceleration Accepted
120  if (acc_err < cur_err) {
121  // Set proximal term to current term
122  for (int mode = 0; mode < num_modes; mode++) {
123  m_prox_t.set(mode, m_acc_t.factor(mode));
124  m_prox_t.distributed_normalize_rows(mode);
125  }
126  PRINTROOT("Acceleration Successful::relative_error::" << acc_err);
127  } else {
128  // Acceleration Failed reset to prev iterate
129  this->reset(m_prox_t, true);
130  acc_fails++;
131  if (acc_fails > fail_limit) {
132  acc_fails = 0;
133  acc_exp++;
134  }
135  PRINTROOT("Acceleration Failed::relative_error::" << acc_err);
136  }
137  }
138  }
139 
140  MAT update(const int mode) {
141  double L, mu, lambda, q, alpha, alpha_prev, beta;
142  int outer_iter = this->current_it();
143  int iter = 0;
144  if (mode == 0) {
145  m_prev_t.set_lambda(m_prox_t.lambda());
146  }
147  m_prev_t.set(mode, m_prox_t.factor(mode));
148  MAT Ht(m_prox_t.factor(mode));
149  MAT Htprev = Ht;
150  m_acc_t.set(mode, Ht);
151  modified_gram = this->global_gram;
152  MPITIC;
153  VEC eigval = arma::eig_sym(modified_gram);
154  eig_time += MPITOC;
155  L = eigval.max();
156  mu = eigval.min();
157  lambda = get_lambda(L, mu);
158  modified_gram.diag() += lambda;
159 
160  q = (mu + lambda) / (L + lambda);
161 
162  MAT modified_local_mttkrp_t =
163  (-1 * lambda) * m_acc_t.factor(mode) - this->ncp_local_mttkrp_t[mode];
164 
165  alpha = 1;
166  alpha_prev = 1;
167  beta = 1;
168 
169  while (iter < NUM_INNER_ITERS) {
170  m_grad_t.set(mode, modified_local_mttkrp_t +
171  (modified_gram * m_acc_t.factor(mode)));
172  if (stop_iter(mode)) break;
173 
174  iter++;
175  Htprev = Ht;
176  Ht = m_acc_t.factor(mode) - ((1 / (L + lambda)) * m_grad_t.factor(mode));
177  MPITIC;
178  fixNumericalError<MAT>(&Ht, EPSILON_1EMINUS16);
179  Ht.for_each([](MAT::elem_type &val) { val = val > 0.0 ? val : 0.0; });
180  proj_time += MPITOC;
181  alpha_prev = alpha;
182  alpha = get_alpha(alpha_prev, q);
183  beta =
184  (alpha_prev * (1 - alpha_prev)) / (alpha + alpha_prev * alpha_prev);
185  m_acc_t.set(mode, Ht + beta * (Ht - Htprev));
186  }
187  if (m_nls_sizes[mode] == 0) {
188  Ht.zeros();
189  m_grad_t.set(mode, Ht);
190  }
191  PRINTROOT("Nesterov Update::mode::" << mode
192  << "::outer_iter::" << outer_iter
193  << "::NLS inner_iter::" << iter);
194  m_prox_t.set(mode, Ht);
195  MPITIC;
196  m_prox_t.distributed_normalize_rows(mode);
197  norm_time += MPITOC;
198  return Ht;
199  }
200 
201  public:
202  DistNTFNES(const Tensor &i_tensor, const int i_k, algotype i_algo,
203  const UVEC &i_global_dims, const UVEC &i_local_dims,
204  const UVEC &i_nls_sizes, const UVEC &i_nls_idxs,
205  const NTFMPICommunicator &i_mpicomm)
206  : DistAUNTF(i_tensor, i_k, i_algo, i_global_dims, i_local_dims,
207  i_nls_sizes, i_nls_idxs, i_mpicomm),
208  m_prox_t(i_nls_sizes, i_k, true),
209  m_acc_t(i_nls_sizes, i_k, true),
210  m_grad_t(i_nls_sizes, i_k, true),
211  m_prev_t(i_nls_sizes, i_k, true) {
212  m_prox_t.zeros();
213  m_prev_t.zeros();
214  m_acc_t.zeros();
215  m_grad_t.zeros();
216  modified_gram.zeros(i_k, i_k);
217  delta1 = 1e-2;
218  delta2 = 1e-2;
219  acc_start = 5;
220  acc_exp = 3;
221  acc_fails = 0;
222  fail_limit = 5;
223  eig_time = 0.0;
224  stop_iter_time = 0.0;
225  proj_time = 0.0;
226  norm_time = 0.0;
227  // Set Accerated to be true
228  this->accelerated(true);
229  }
231  PRINTROOT("::eigen time::" << eig_time << "::stop_iter_time::"
232  << stop_iter_time << "::proj_time::" << proj_time
233  << "::norm_time::" << norm_time);
234  }
235 }; // class DistNTFNES
236 
237 } // namespace planc
238 
239 #endif // DISTNTF_DISTNTFNES_HPP_
void accelerated(const bool &set_acceleration)
Does the algorithm need acceleration?
Definition: distauntf.hpp:513
Data is stored such that the unfolding is column major.
Definition: tensor.hpp:32
#define EPSILON_1EMINUS16
Definition: utils.h:43
#define MPITIC
Definition: distutils.h:26
#define MPITOC
Definition: distutils.h:27
algotype
Definition: utils.h:10
void reset(const NCPFactors &new_factors, bool trans=false)
This function will completely reset all the factors and the state of AUNTF.
Definition: distauntf.hpp:529
#define UVEC
Definition: utils.h:58
VEC lambda() const
returns the lambda vector
Definition: ncpfactors.hpp:104
DistNTFNES(const Tensor &i_tensor, const int i_k, algotype i_algo, const UVEC &i_global_dims, const UVEC &i_local_dims, const UVEC &i_nls_sizes, const UVEC &i_nls_idxs, const NTFMPICommunicator &i_mpicomm)
Definition: distntfnes.hpp:202
void set(const int i_n, const MAT &i_factor)
Set the mode i_n with the given factor matrix.
Definition: ncpfactors.hpp:112
int current_it() const
Returns the current outer iteration of the computeNTF.
Definition: distauntf.hpp:496
void set_lambda(const VEC &new_lambda)
sets the lambda vector
Definition: ncpfactors.hpp:117
double current_error() const
Returns the current error.
Definition: distauntf.hpp:498
int rank() const
returns low rank
Definition: ncpfactors.hpp:96
#define MAT
Definition: utils.h:52
int modes() const
returns number of modes
Definition: ncpfactors.hpp:102
void zeros()
this is for reinitializing zeros across different processors.
Definition: ncpfactors.hpp:382
ncp_factors contains the factors of the ncp every ith factor is of size n_i * k number of factors is ...
Definition: ncpfactors.hpp:20
#define PRINTROOT(MSG)
Definition: distutils.h:32
#define VEC
Definition: utils.h:61
MAT & factor(const int i_n) const
factor matrix of a mode i_n
Definition: ncpfactors.hpp:100
VEC lambda()
Returns the lambda of the NCP factors.
Definition: distauntf.hpp:494