3 #ifndef DISTNMF_DISTIO_HPP_ 4 #define DISTNMF_DISTIO_HPP_ 25 template <
class MATTYPE>
33 static const int kPrimeOffset = 10;
35 static const int kW_seed_idx = 1210873;
37 static const int kalpha = 5;
38 static const int kbeta = 10;
40 static const int kalpha = 1;
41 static const int kbeta = 0;
51 void randMatrix(
const std::string type,
const int primeseedidx,
52 const double sparsity, MATTYPE* X) {
53 if (primeseedidx == -1) {
54 arma::arma_rng::set_seed_random();
59 DISTPRINTINFO(
"randMatrix::" << primeseedidx <<
"::sp=" << sparsity);
62 if (type ==
"uniform" || type ==
"lowrank") {
63 (*X).sprandu((*X).n_rows, (*X).n_cols, sparsity);
64 }
else if (type ==
"normal") {
65 (*X).sprandn((*X).n_rows, (*X).n_cols, sparsity);
67 SP_MAT::iterator start_it = (*X).begin();
68 SP_MAT::iterator end_it = (*X).end();
69 for (SP_MAT::iterator it = start_it; it != end_it; ++it) {
70 double currentValue = (*it);
71 (*it) = ceil(kalpha * currentValue + kbeta);
72 if ((*it) < 0) (*it) = kbeta;
83 if (type ==
"uniform") {
85 }
else if (type ==
"normal") {
88 (*X) = kalpha * (*X) + kbeta;
90 (*X).elem(find((*X) < 0)).zeros();
96 void normalize(
normtype i_normtype) {
97 ROWVEC globalnormA = arma::zeros<ROWVEC>(m_A.n_cols);
98 ROWVEC normc = arma::zeros<ROWVEC>(m_A.n_cols);
99 MATTYPE normmat = arma::zeros<MATTYPE>(m_A.n_rows, m_A.n_cols);
102 if (i_normtype ==
L2NORM) {
103 normc = arma::sum(arma::square(m_A));
104 MPI_Allreduce(normc.memptr(), globalnormA.memptr(), m_A.n_cols,
105 MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
107 }
else if (i_normtype ==
MAXNORM) {
108 normc = arma::max(m_A);
109 MPI_Allreduce(normc.memptr(), globalnormA.memptr(), m_A.n_cols,
110 MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
116 if (i_normtype ==
L2NORM) {
117 globalnormA = arma::sum(arma::square(m_Arows));
118 }
else if (i_normtype ==
MAXNORM) {
119 globalnormA = arma::max(arma::square(m_Arows));
123 if (i_normtype ==
L2NORM) {
124 normc = arma::sum(arma::square(m_A));
125 MPI_Allreduce(normc.memptr(), globalnormA.memptr(), m_A.n_cols,
126 MPI_DOUBLE, MPI_SUM, this->m_mpicomm.
commSubs()[1]);
127 }
else if (i_normtype ==
MAXNORM) {
128 normc = arma::max(m_A);
129 MPI_Allreduce(normc.memptr(), globalnormA.memptr(), m_A.n_cols,
130 MPI_DOUBLE, MPI_SUM, this->m_mpicomm.
commSubs()[1]);
134 INFO <<
"cannot normalize" << std::endl;
136 normmat = arma::repmat(globalnormA, m_A.n_rows, 1);
145 void randomLowRank(
const UWORD m,
const UWORD n,
const UWORD k, MATTYPE* X) {
146 uint start_row = 0, start_col = 0;
147 uint end_row = 0, end_col = 0;
151 end_row = ((
MPI_RANK + 1) * (*X).n_rows) - 1;
153 end_col = (*X).n_cols - 1;
157 end_row = (*X).n_rows - 1;
159 end_col = ((
MPI_RANK + 1) * (*X).n_cols) - 1;
165 if ((*X).n_cols == n) {
167 end_row = ((
MPI_RANK + 1) * (*X).n_rows) - 1;
171 if ((*X).n_rows == m) {
175 end_col = ((
MPI_RANK + 1) * (*X).n_cols) - 1;
187 arma::arma_rng::set_seed(kW_seed_idx);
193 SP_MAT::iterator start_it = (*X).begin();
194 SP_MAT::iterator end_it = (*X).end();
195 double tempVal = 0.0;
196 for (SP_MAT::iterator it = start_it; it != end_it; ++it) {
197 VEC Wrndi = vectorise(Wrnd.row(start_row + it.row()));
198 VEC Hrndj = Hrnd.col(start_col + it.col());
199 tempVal = dot(Wrndi, Hrndj);
200 (*it) = ceil(kalpha * tempVal + kbeta);
204 if ((*X).n_cols == n) {
205 MAT myWrnd = Wrnd.rows(start_row, end_row);
206 templr = myWrnd * Hrnd;
207 }
else if ((*X).n_rows == m) {
208 MAT myHcols = Hrnd.cols(start_col, end_col);
209 templr = Wrnd * myHcols;
210 }
else if ((((*X).n_rows == (m /
MPI_SIZE)) && ((*X).n_cols == (n /
MPI_SIZE))) ||
211 (((*X).n_rows == (m / this->m_mpicomm.pr())) &&
212 ((*X).n_cols == (n / this->m_mpicomm.
pc())))) {
213 MAT myWrnd = Wrnd.rows(start_row, end_row);
214 MAT myHcols = Hrnd.cols(start_col, end_col);
215 templr = myWrnd * myHcols;
217 (*X) = ceil(kalpha * templr + kbeta);
222 void uniform_dist_matrix(MATTYPE&
A) {
227 unsigned int max_rows = 0, max_cols = 0;
228 unsigned int my_rows =
A.n_rows;
229 unsigned int my_cols =
A.n_cols;
230 bool last_exist =
false;
231 double my_min_value = 0.0;
233 UWORD my_correct_nnz = 0;
234 if (
A.n_nonzero > 0) {
235 my_min_value =
A.values[0];
237 MPI_Allreduce(&my_rows, &max_rows, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
238 MPI_Allreduce(&my_cols, &max_cols, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
240 max_rows -= (max_rows % m_mpicomm.
pr());
241 max_cols -= (max_cols % m_mpicomm.
pc());
245 if (
A.n_nonzero > 0) {
246 SP_MAT::iterator start_it =
A.begin();
247 SP_MAT::iterator end_it =
A.end();
248 for (SP_MAT::iterator it = start_it; it != end_it; ++it) {
249 if (it.row() < max_rows && it.col() < max_cols) {
251 if (*it != 0 && my_min_value < *it) {
255 if (it.row() == max_rows - 1 && it.col() == max_cols - 1) {
264 if (
A.n_nonzero == 0) {
267 MPI_Allreduce(&my_min_value, &overall_min, 1, MPI_INT, MPI_MIN,
271 DISTPRINTINFO(
"max_rows::" << max_rows <<
"::max_cols::" << max_cols
272 <<
"::my_rows::" << my_rows <<
"::my_cols::" 273 << my_cols <<
"::last_exist::" << last_exist
274 <<
"::my_nnz::" <<
A.n_nonzero
275 <<
"::my_correct_nnz::" << my_correct_nnz);
276 locs = arma::zeros<arma::umat>(2, my_correct_nnz);
277 vals = arma::zeros<VEC>(my_correct_nnz);
278 if (
A.n_nonzero > 0) {
279 SP_MAT::iterator start_it =
A.begin();
280 SP_MAT::iterator end_it =
A.end();
282 for (SP_MAT::iterator it = start_it; it != end_it; ++it) {
283 if (it.row() < max_rows && it.col() < max_cols) {
284 locs(0, idx) = it.row();
285 locs(1, idx) = it.col();
292 vals(0) = overall_min;
294 if (
A.n_nonzero > 0 && !last_exist) {
295 locs(0, my_correct_nnz - 1) = max_rows - 1;
296 locs(1, my_correct_nnz - 1) = max_cols - 1;
297 vals(my_correct_nnz - 1) = overall_min;
302 }
catch (
const std::exception& e) {
304 <<
"max_rows::" << max_rows <<
"::max_cols::" << max_cols
305 <<
"::my_rows::" << my_rows <<
"::my_cols::" << my_cols
306 <<
"::last_exist::" << last_exist <<
"::my_nnz::" 307 <<
A.n_nonzero <<
"::my_correct_nnz::" << my_correct_nnz);
314 : m_mpicomm(mpic), m_distio(iod) {}
338 std::string rand_prefix(
"rand_");
339 if (!file_name.compare(0, rand_prefix.size(), rand_prefix)) {
340 std::string type = file_name.substr(rand_prefix.size());
341 assert(type ==
"normal" || type ==
"lowrank" || type ==
"uniform");
345 randMatrix(type,
MPI_RANK + kPrimeOffset, sparsity, &m_Arows);
346 if (type ==
"lowrank") {
347 randomLowRank(m, n, k, &m_Arows);
352 randMatrix(type,
MPI_RANK + kPrimeOffset, sparsity, &m_Acols);
353 if (type ==
"lowrank") {
354 randomLowRank(m, n, k, &m_Acols);
359 m_Arows.set_size(m / p, n);
360 m_Acols.set_size(m, n / p);
361 randMatrix(type,
MPI_RANK + kPrimeOffset, sparsity, &m_Arows);
362 if (type ==
"lowrank") {
363 randomLowRank(m, n, k, &m_Arows);
365 randMatrix(type,
MPI_RANK + kPrimeOffset, sparsity, &m_Acols);
366 if (type ==
"lowrank") {
367 randomLowRank(m, n, k, &m_Acols);
372 m_A.zeros(m / pr, n / pc);
373 randMatrix(type,
MPI_RANK + kPrimeOffset, sparsity, &m_A);
374 if (type ==
"lowrank") {
375 randomLowRank(m, n, k, &m_A);
380 std::stringstream sr, sc;
384 m_Arows.load(sr.str(), arma::coord_ascii);
385 uniform_dist_matrix(m_Arows);
387 m_Arows.load(sr.str());
393 m_Acols.load(sc.str(), arma::coord_ascii);
394 uniform_dist_matrix(m_Acols);
396 m_Acols.load(sc.str());
398 m_Acols = m_Acols.t();
400 if (m_distio ==
TWOD) {
405 temp_ijv.load(sr.str(), arma::raw_ascii);
406 if (temp_ijv.n_rows > 0 && temp_ijv.n_cols > 0) {
407 MAT vals(2, temp_ijv.n_rows);
408 MAT idxs_only = temp_ijv.cols(0, 1);
409 arma::umat idxs = arma::conv_to<arma::umat>::from(idxs_only);
410 arma::umat idxst = idxs.t();
411 vals = temp_ijv.col(2);
412 SP_MAT temp_spmat(idxst, vals);
415 arma::umat idxs = arma::zeros<arma::umat>(2, 1);
416 VEC vals = arma::zeros<VEC>(1);
417 SP_MAT temp_spmat(idxs, vals);
421 uniform_dist_matrix(m_A);
428 if (i_normalization !=
NONE) {
429 normalize(i_normalization);
440 const std::string& output_file_name) {
441 std::stringstream sw, sh;
444 W.save(sw.str(), arma::raw_ascii);
445 H.save(sh.str(), arma::raw_ascii);
448 std::string file_name(
"Arnd");
449 std::stringstream sr, sc;
450 if (m_distio ==
TWOD) {
456 this->m_A.save(sr.str(), arma::coord_ascii);
458 this->m_A.save(sr.str(), arma::raw_ascii);
466 this->m_Arows.save(sr.str(), arma::coord_ascii);
468 this->m_Arows.save(sr.str(), arma::raw_ascii);
476 this->m_Acols.save(sc.str(), arma::coord_ascii);
478 this->m_Acols.save(sc.str(), arma::raw_ascii);
482 const MATTYPE&
Arows()
const {
return m_Arows; }
483 const MATTYPE&
Acols()
const {
return m_Acols; }
484 const MATTYPE&
A()
const {
return m_A; }
499 INFO <<
"Arows:" << mpicomm.
rank() << std::endl
500 << arma::conv_to<MAT>::from(dio.
Arows()) << std::endl;
501 INFO <<
"Acols:" << mpicomm.
rank() << std::endl
502 << arma::conv_to<MAT>::from(dio.
Acols()) << std::endl;
505 #endif // DISTNMF_DISTIO_HPP_ const MPICommunicator & mpicomm() const
void readInput(const std::string file_name, UWORD m=0, UWORD n=0, UWORD k=0, double sparsity=0, UWORD pr=0, UWORD pc=0, normtype i_normalization=NONE)
We need m,n,pr,pc only for rand matrices.
int random_sieve(const int)
#define DISTPRINTINFO(MSG)
const int pc() const
Total number of column processor.
const int pr() const
Total number of row processors.
const MATTYPE & A() const
const MATTYPE & Acols() const
void testDistIO(char argc, char *argv[])
const MATTYPE & Arows() const
const int rank() const
returns the global rank
ncp_factors contains the factors of the ncp every ith factor is of size n_i * k number of factors is ...
void writeOutput(const MAT &W, const MAT &H, const std::string &output_file_name)
Writes the factor matrix as output_file_name_W_MPISIZE_MPIRANK.
const MPI_Comm * commSubs() const