planc
Parallel Lowrank Approximation with Non-negativity Constraints
ddttensor.hpp
Go to the documentation of this file.
1 /* Copyright Koby Hayashi 2018 */
2 
3 #ifndef DIMTREE_DDTTENSOR_HPP_
4 #define DIMTREE_DDTTENSOR_HPP_
5 
6 #include "dimtree/ddttensor.h"
7 
8 /*
9  Element wise multiplication between two vectors
10 */
11 
12 void vdMul(long int n, double *a, double *b, double *c) {
13  for (long int i = 0; i < n; i++) c[i] = a[i] * b[i];
14 }
15 
16 /*
17  Prints a matrix in column major order
18 */
19 void printM_ColMajor(double *M, long int num_cols, long int num_rows) {
20  long int i, j;
21 
22  printf("\n");
23 
24  for (j = 0; j < num_rows; j++) {
25  for (i = 0; i < num_cols; i++) {
26  if (i == 0) {
27  printf(",%0.10lf", M[i * num_rows + j]);
28  } else {
29  printf(",%0.10lf", M[i * num_rows + j]);
30  }
31  }
32  printf("\n");
33  }
34 }
35 
36 /*
37  Prints a matrix in row major order
38 */
39 void printM_RowMajor(double *M, long int num_cols, long int num_rows) {
40  long int i, j;
41 
42  printf("\n");
43 
44  for (j = 0; j < num_rows; j++) {
45  for (i = 0; i < num_cols; i++) {
46  if (i == 0) {
47  printf(",%0.10lf", M[j * num_cols + i]);
48  } else {
49  printf(",%0.10lf", M[j * num_cols + i]);
50  }
51  }
52  printf("\n");
53  }
54 }
55 
56 void print_Ktensor_RowMajor(ktensor *Y) {
57  long int i;
58 
59  printf("=========Ktensor========\n");
60  printf("Rank = %ld\n", Y->rank);
61  printf("Nmodes = %ld\n", Y->nmodes);
62  printf("Dims = [");
63  for (i = 0; i < Y->nmodes; i++) {
64  if (i != Y->nmodes - 1)
65  printf("%ld,", Y->dims[i]);
66  else
67  printf("%ld]\n", Y->dims[i]);
68  }
69  printf("Num_Eles = %ld\n", Y->dims_product);
70  printf("Lambdas = [");
71  for (i = 0; i < Y->rank; i++) {
72  if (i != Y->rank - 1)
73  printf("%.5lf,", Y->lambdas[i]);
74  else
75  printf("%.5lf]\n", Y->lambdas[i]);
76  }
77  for (i = 0; i < Y->nmodes; i++) {
78  printf("Factor[%ld]", i);
79  printM_RowMajor(Y->factors[i], Y->rank, Y->dims[i]);
80  }
81  printf("========================\n");
82 }
83 
84 /*
85  Prints the first mode, ColMajor matricization of a tensor struct
86  1) T, pointer to the tensor to print
87  2) show_data, flag saying if you want T->data to be printed
88 */
89 void print_tensor(tensor *T, long int show_data) {
90  long int i;
91  printf("==========Tensor========\n");
92  printf("T->nmodes = %ld\n", T->nmodes);
93  printf("T->dims_product = %ld\n", T->dims_product);
94  printf("T->dims[");
95  for (i = 0; i < T->nmodes; i++) {
96  printf("%ld,", T->dims[i]);
97  }
98  printf("]\n");
99 
100  if (show_data == 1) {
101  printf("T->data:");
102  printM_ColMajor(T->data, T->dims[0], T->dims_product / T->dims[0]);
103  }
104  printf("========================\n");
105 }
106 
107 /*
108  Prints inputs to a cblas_dgemm function call.
109 */
110 void print_dgemm_inputs(CBLAS_ORDER dgemm_layout, CBLAS_TRANSPOSE transA,
111  CBLAS_TRANSPOSE transB, long int m, long int n,
112  long int k, double alpha, long int strideA,
113  long int strideB, double beta, long int strideC) {
114  printf("-------------------------------\n");
115  printf("dgemm_inputs :: \n");
116 
117  if (dgemm_layout == CblasColMajor) {
118  printf("ColMajor\n");
119  } else {
120  printf("RowMajor\n");
121  }
122 
123  if (transA == CblasNoTrans) {
124  printf("NoTrans\n");
125  } else {
126  printf("Trans\n");
127  }
128 
129  if (transB == CblasNoTrans) {
130  printf("NoTrans\n");
131  } else {
132  printf("Trans\n");
133  }
134 
135  printf("m = %ld\n", m);
136  printf("n = %ld\n", n);
137  printf("k = %ld\n", k);
138  printf("alpha = %lf\n", alpha);
139  printf("strideA = %ld\n", strideA);
140  printf("strideB = %ld\n", strideB);
141  printf("beta = %lf\n", beta);
142  printf("strideC = %ld\n", strideC);
143  printf("-------------------------------\n");
144 }
145 
146 /*
147  Prints inputs to a cblas_dgemv function call.
148 */
149 void print_dgemv_inputs(CBLAS_ORDER dgemv_layout, CBLAS_TRANSPOSE transA,
150  long int m, long int n, double alpha, long int T_offset,
151  long int tensor_stride, long int A_offset,
152  long int A_stride, double beta,
153  long int output_col_stride, long int output_stride) {
154  printf("-------------------------------\n");
155  printf("dgemv_inputs :: \n");
156 
157  if (dgemv_layout == CblasColMajor) {
158  printf("ColMajor\n");
159  } else {
160  printf("RowMajor\n");
161  }
162 
163  if (transA == CblasNoTrans) {
164  printf("NoTrans\n");
165  } else {
166  printf("Trans\n");
167  }
168  printf("m = %ld\n", m);
169  printf("n = %ld\n", n);
170  printf("alpha = %lf\n", alpha);
171  printf("T_offset = %ld\n", T_offset);
172  printf("tensor_stride= %ld\n", tensor_stride);
173  printf("A_offset = %ld\n", A_offset);
174  printf("A_stride = %ld\n", A_stride);
175  printf("beta = %lf\n", beta);
176  printf("output_col_stride = %ld\n", output_col_stride);
177  printf("output_stride = %ld\n", output_stride);
178  printf("-------------------------------\n");
179 }
180 
181 /*
182  MTTKRP_RowMajor();
183  reorder_Factors();
184  Description -- reorders the factor matrices for the
185  MTTKRP_RowMajor function. Skips over the nth factor
186  example skipping over 2 in
187  {0,1,2,3,4}->{0,1,3,4}, also does the Dims.
188 */
189 void reorder_Factors(ktensor *Y, double **reordered_Factors,
190  long int *reordered_Dims, long int n) {
191  long int i, j;
192  for (i = 0, j = 0; i < Y->nmodes; i++) {
193  if (i != n) {
194  reordered_Factors[j] = Y->factors[i];
195  reordered_Dims[j] = Y->dims[i];
196  j++;
197  }
198  }
199 }
200 /*
201  MTTKRP_RowMajor();
202  update_Partial_Hadamards();
203 */
204 void update_Partial_Hadamards(ktensor *Y, long int *indexers,
205  double *partial_Hadamards,
206  double **reordered_Factors) {
207  // check to see where we need to update form
208  long int update_from, i;
209 
210  /*
211  Iterate over the indexers array looking for a non 0.
212  While the ith place is 0 keep searching, you will always hit a nonzero
213  before the end because if the entries are all zero we have either just
214  stared or just ended the KR product. update_from will hold the index of the
215  last factor matrix to cycle, the one we should update from including that
216  factor matrix.
217  */
218  for (i = 0; i < Y->nmodes - 1; i++) {
219  update_from = i;
220  if (indexers[i] != 0) break;
221  }
222 
223  // if update_from is 0 we do not have do update any partials
224  if (update_from == 0) {
225  return;
226  } else if (update_from >= Y->nmodes - 3) {
227  /*
228  If update_from is the second to last index or the last index,
229  we have to update all of the partials
230  */
231  for (i = Y->nmodes - 4; i >= 0;
232  i--) { // start from the right most partial, Y->nmodes-4 is the last
233  // index of the partialHadamards array, i iterates over the
234  // partials array
235  if (i == Y->nmodes - 4) { // on the first iteration we need to access the
236  // last and second factor matrix
237  /*
238  call vdMul to get the hadamard product of the last two factor
239  matrices 1) length of the vectors 2) second to last factor matrix 3)
240  last factor matrix 4) output location in partial_Hadmards
241  */
242  vdMul(Y->rank,
243  &reordered_Factors[Y->nmodes - 3]
244  [Y->rank * indexers[Y->nmodes - 3]],
245  &reordered_Factors[Y->nmodes - 2]
246  [Y->rank * indexers[Y->nmodes - 2]],
247  &partial_Hadamards[Y->rank * i]);
248  } else { // we are not updating the last partial_Hadamard so we can use a
249  // previous partial
250  /*
251  call vdMul to get the hadamards product of the current factor
252  matrix based off i and the partial_Hadamards one stride to the right
253  Note: the factor whose row we want is i-1
254  1) the lenght of the vectors
255  2) the (i+1)th factor matrices row
256  3) the partial_Hadamard one stride to the right
257  4) output location in patial_Hadamards
258  */
259  vdMul(Y->rank, &reordered_Factors[i + 1][Y->rank * indexers[i + 1]],
260  &partial_Hadamards[Y->rank * (i + 1)],
261  &partial_Hadamards[Y->rank * i]);
262  }
263  }
264  } else { // the case that we are updating from somewhere that is update_from
265  // < Y->nmodes - 4, not the last or second to last
266  for (i = update_from - 1; i >= 0; i--) {
267  /*
268  call vdMul to update the partial_Hadamards to the left of and
269  including i
270  */
271  vdMul(Y->rank, &reordered_Factors[i + 1][Y->rank * indexers[i + 1]],
272  &partial_Hadamards[Y->rank * (i + 1)],
273  &partial_Hadamards[Y->rank * i]);
274  }
275  }
276 }
277 
278 void KR_RowMajor(ktensor *Y, double *C, long int n) {
279  long int i, j, lF, rF;
280  if (n != Y->nmodes - 1)
281  lF = Y->nmodes - 1;
282  else
283  lF = Y->nmodes - 2;
284  rF = lF - 1;
285  if (rF == n) rF--;
286 
287  for (i = 0; i < Y->dims[lF]; i++) { // loop over the rows of the left matrix
288  for (j = 0; j < Y->dims[rF];
289  j++) { // loop over the rows of the right matrix
290  vdMul(Y->rank, &Y->factors[lF][i * Y->rank], &Y->factors[rF][j * Y->rank],
291  &C[i * Y->rank * Y->dims[rF] + j * Y->rank]);
292  }
293  }
294 }
295 
296 /*
297  Multi_KR_RowMajor();
298  1) Y, ktensor of factor matrices to for the KRP from
299  2) C, output matrix
300  3) n, modes to skip over only used fo the Y->nmodes == 3 case
301 */
302 
303 void Multi_KR_RowMajor(ktensor *Y, double *C, long int n) {
304  if (Y->nmodes == 3) {
305  KR_RowMajor(Y, C, n);
306  } else {
307  long int i, j, *indexers, *reordered_Dims;
308  double *partial_Hadamards, **reordered_Factors;
309 
310  reordered_Factors =
311  reinterpret_cast<double **>(malloc(sizeof(double *) * Y->nmodes - 1));
312  reordered_Dims = (long int *)malloc(sizeof(long int) * Y->nmodes - 1);
313  indexers = (long int *)malloc(sizeof(long int) * Y->nmodes -
314  1); // an indexer for each mode-1
315  partial_Hadamards = reinterpret_cast<double *>(
316  malloc(sizeof(double) * Y->rank * (Y->nmodes) -
317  3)); // N-3 extra vectors, for partials
318 
319  // reorder the Factors and Dimensions
320  reorder_Factors(Y, reordered_Factors, reordered_Dims, n);
321 
322  memset(indexers, 0, sizeof(long int) * (Y->nmodes - 1));
323 
324  // initialize the partial_Hadamards to the starting values, i indexes the
325  // partial_Hadamards
326  for (i = Y->nmodes - 4; i >= 0; i--) {
327  if (i == Y->nmodes - 4) {
328  vdMul(Y->rank, &reordered_Factors[Y->nmodes - 3][0],
329  &reordered_Factors[Y->nmodes - 2][0],
330  &partial_Hadamards[Y->rank * i]);
331  } else {
332  vdMul(Y->rank, &reordered_Factors[i + 1][0],
333  &partial_Hadamards[Y->rank * (i + 1)],
334  &partial_Hadamards[Y->rank * i]);
335  }
336  }
337 
338  // Iterate over the rows of the KRP matrix
339  for (i = 0; i < Y->dims_product / Y->dims[n]; i++) {
340  /*
341  compute the current row using vdMul
342  1) Y->rank, length of a row
343  2) always some row of the 0th factor matrix
344  3) always the first partial_hadamard
345  4) the current row the KR matrix
346  */
347  vdMul(Y->rank, &reordered_Factors[0][Y->rank * indexers[0]],
348  &partial_Hadamards[0], &C[Y->rank * i]);
349 
350  // update the indexers array
351  for (j = 0; j < Y->nmodes - 1; j++) {
352  indexers[j] += 1;
353  if (indexers[j] >= reordered_Dims[j]) {
354  indexers[j] = 0;
355  } else {
356  break;
357  }
358  }
359 
360  update_Partial_Hadamards(Y, indexers, partial_Hadamards,
361  reordered_Factors);
362  }
363 
364  free(indexers);
365  free(partial_Hadamards);
366  free(reordered_Dims);
367  free(reordered_Factors);
368  }
369 }
370 
371 // you need to include the diagonal
372 void Upper_Hadamard_RowMajor(long int nRows, long int nCols, double *A,
373  double *B, double *C) {
374  long int i, j;
375 
376  for (i = 0; i < nRows; i++) {
377  for (j = i; j < nCols; j++) {
378  C[i * nCols + j] = A[i * nCols + j] * B[i * nCols + j];
379  }
380  }
381 }
382 
383 /*
384  CompareM()
385  Description --
386  A function for comparing two matrices to see if they differ
387  significantly in some value. The last long intput option is your tollerance
388  for the difference between A[i,j] and B[i,j]. if you make it a -1 the matlab
389  eps*100 will be used. 1 -> true, 0 -> false
390  Variables --
391  A) matrix to compare against B
392  B) matrix to compare against A nRows) the number of rows in A and B
393  nCols) the number of colums in A and B
394  eps) the tolerance for differences between A and B
395 
396 */
397 long int CompareM(double *A, double *B, long int nRows, long int nCols,
398  double eps) {
399  if (eps == -1.0) eps = 2.220446049250313E-16;
400  eps = eps * 100;
401  double max_dif = eps;
402  long int are_same = 1; // 1 -> true, 0 -> false
403  for (long int i = 0; i < nRows * nCols; i++) {
404  if (fabs(A[i] - B[i]) > max_dif) {
405  max_dif = A[i] - B[i];
406  are_same = 0;
407  }
408  }
409  printf("Max_dif=%.20lf\n", max_dif);
410  return are_same;
411 }
412 
413 void MTTKRP_RowMajor(tensor *T, double *K, double *C, long int rank,
414  long int n) {
415  if (T == NULL) {
416  printf("Tensor pointer is null, error\n");
417  exit(-1);
418  }
419  if (K == NULL) {
420  printf("Khatri Rhao product pointer is null, error\n");
421  exit(-2);
422  }
423  if (C == NULL) {
424  printf("Output Matrix is null!\n");
425  exit(-3);
426  }
427  if (rank < 1) {
428  printf("Rank must be greater than 1!\n");
429  exit(-4);
430  }
431  if (n > T->nmodes - 1) {
432  printf("n is larger than the number of modes in the tensor, T->nmodes!\n");
433  exit(-5);
434  }
435  if (n < 0) {
436  printf("n cannot be a negative number!\n");
437  exit(-6);
438  }
439 
440  // long int i, nDim;
441  int i, nDim;
442  double alpha, beta;
443 
444  // for calling dgemm_
445  // char nt = 'N';
446  // char t = 'T';
447  // int i_n = n;
448  // int i_rank = rank;
449 
450  if (n == 0) {
451  // long int ncols = 1;
452  int ncols = 1;
453 
454  ncols = T->dims_product / T->dims[n];
455  alpha = 1.0;
456  beta = 0.0;
457  nDim = T->dims[n];
458  /*
459  This degemm call performs the column wise matrix multiply between
460  the first mode matricization of the tensor and the matrix K. Operation is
461  X_1 x K
462  Inputs --
463  1) CblasColMajor indicates the matrices are in column major format
464  2) CblasNoTrans do not transpose the left matrix, T->data
465  3) CblasNoTrans do not transpose the right matrix, K
466  4) nDim, the number of rows in T-data
467  5) rank, the number of columns in K
468  6) ncols, the shared dimension, the number of columns in T->data
469  and rows in K
470  7) alpha
471  8) T->data, the left matrix, first mode matricization of the tensor
472  9) nDim, the leading dimension of T->data, the distance between
473  columns of T->data
474  10) K, the right matrix, khatri rao product
475  11) ncols, the disctance between columns in the K matrix
476  12) beta
477  13) C, the output matrix,
478  14) nDim the distance between columns in C
479  */
480 
481  // printf("Input args to cblas_dgemm
482  // function\nnDim=%ld\nrank=%ld\n,ncols=%ld\n",*(T->dims+n-1), rank, ncols);
483  // This does M*KR
484  // cblas_dgemm( CblasRowMajor, CblasTrans, CblasNoTrans, nDim, rank, ncols,
485  // alpha, T->data, nDim, K, rank, beta, C, rank );
486  // This does KR'*M'
487  cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, rank, nDim, ncols,
488  alpha, K, rank, T->data, nDim, beta, C, rank);
489  // dgemm_(&nt, &t, &i_rank, &nDim, &ncols, &alpha, K, &i_rank, T->data,
490  // &nDim,
491  // &beta, C, &i_rank);
492 
493  } else { // if n != 0 it is not the first dimension, so n is at least 1
494  int nmats = 1; // nmats is the number of submatrices to be multiplied
495  int ncols = 1; // ncols is the number of columns in a submatrix
496 
497  // calculate the number of columns in the sub-matrix of a matricized tensor
498  for (i = 0; i < n; i++) {
499  ncols *= T->dims[i];
500  }
501 
502  // calculate the number of row major sub-matrices in the matricized tensor
503  for (i = n + 1; i < T->nmodes; i++) {
504  nmats *= T->dims[i];
505  }
506 
507  // do nmats dgemm calls on chunks of our two matrices
508  for (i = 0; i < nmats; i++) {
509  if (i == 0)
510  beta = 0.0;
511  else
512  beta = 1.0;
513  alpha = 1.0;
514  nDim = T->dims[n];
515 
516  // printf("Input args to cblas_dgemm
517  // function\nnDim=%ld\nrank=%ld\ncols=%ld\nnmats=%ld\ni=%ld\n",nDim, rank,
518  // ncols, nmats, i);
519 
520  /*
521  This dgemm call is more complex
522  1) CblasColMajor - treat matrices as if they are in column major
523  order
524  2) CblasTrans - transpose the left matrix because we want it in
525  row major ordering
526  3) CblasNoTrans - still treat K as a column major
527  matrix
528  4) nDim - the number of rows in a submatrix
529  5) rank - the number of columns in K
530  6) ncols - the number of columns in a submatrix and rows in K
531  7) alpha
532  8) T->data + i*ncols*nDim, ncols*nDim is the size of
533  a submatrix, a submatix is stored in contiguous memory,
534  T->datancols*nDims*i indicates which submatrix we are on,
535  9) ncols -
536  the distance between rows of a submatrix, but remember its transposed
537  so it could also be thought of as the number distance between columns
538  of a transposed submatrix.
539  10) K+i*ncols - starting polong int of the khatri rao submatrix
540  11) ncols*nmats - the distance between columns of
541  the K matrix, ncols*nmats is the number of rows in the full K matrix
542  12) beta
543  C) the out put matrix, size nDim by rank
544  nDim) the distance between columns of the C matrix
545  */
546  cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nDim, rank, ncols,
547  alpha, T->data + i * nDim * ncols, ncols,
548  K + i * ncols * rank, rank, beta, C, rank);
549  // dgemm_(&nt, &nt, &nDim, &i_rank, &ncols, &alpha,
550  // T->data + i * nDim * ncols, &ncols, K + i * ncols * rank,
551  // &i_rank, &beta, C, &i_rank);
552  }
553  } // End of else
554 }
555 
556 /*
557  MHada_RowMajor
558  This function is a simpler version of MHada_RowMajor.
559  It computes the Hadamards product of n-1 matrices skipping over the nth
560  matrix in the array of matrices that it is given.
561 */
562 void MHada_RowMajor(ktensor *Y, double **SYRKs, double *V, long int n) {
563  long int i, j, c;
564  j = 0;
565 
566  if (n ==
567  0) // if n==0 we do not want to use the 0th factor matrix so start at 1
568  i = 1;
569  else
570  i = 0;
571 
572  for (c = 0; c < Y->nmodes - 1;
573  c++, i++) { // c keeps track of the number of multiplies that should be
574  // performed
575  if (i == n) // skip over the nth factor matrix
576  i++;
577  /*
578  For the first round of multiples we just need to copy C long into
579  V, this avoids having to fill V with any initial values.
580  */
581 
582  if (j == 0) { // couldn't I just use c?
583  j += 1;
584  cblas_dcopy(Y->rank * Y->rank, SYRKs[i], 1, V, 1);
585  } else {
586  Upper_Hadamard_RowMajor(Y->rank, Y->rank, V, SYRKs[i], V);
587  }
588  } // end of for i
589 }
590 
591 void do_SYRKs_RowMajor(ktensor *Y, double **SYRKs) {
592  long int i;
593  double alpha, beta;
594  alpha = 1.0;
595  beta = 0.0;
596  for (i = 0; i < Y->nmodes; i++) {
597  cblas_dsyrk(CblasRowMajor, CblasUpper, CblasTrans, Y->rank, Y->dims[i],
598  alpha, Y->factors[i], Y->rank, beta, SYRKs[i], Y->rank);
599  }
600 }
601 
602 /*
603  normalize_Factor_Matrix_RowMajor();
604  Description --
605  Takes a ktensor and an long int n and normalizes the nth factor
606  matrix. This will over write the lambda values of the ktensor with the
607  column norms of the nth factor matrix. Variables -- Y) the ktensor n) the
608  factor matrix to normalize i) general purpose iterator, moves over the rank
609  of the nth factor matrix of Y
610 
611  Notes: ask Grey about the 1.0/Y-labmdas[i] division operation, worrying
612  about divide by 0 errors!
613 */
614 void normalize_Factor_Matrix_RowMajor(ktensor *Y, long int n) {
615  long int i;
616  for (i = 0; i < Y->rank; i++) {
617  Y->lambdas[i] = cblas_dnrm2(Y->dims[n], Y->factors[n] + i, Y->rank);
618  cblas_dscal(Y->dims[n], 1.0 / Y->lambdas[i], Y->factors[n] + i, Y->rank);
619  }
620 }
621 
622 /*
623  normalize_Ktensor_RowMajor()
624  Takes in a ktensor Y whose factor matrices are stored in row major
625  ordering and normalizes them column-wise saving the weights in the Y->lambdas
626  vector.
627 */
628 void normalize_Ktensor_RowMajor(ktensor *Y) {
629  long int i, j;
630  double l;
631 
632  for (i = 0; i < Y->nmodes; i++) { // loop over the factor matrices
633  for (j = 0; j < Y->rank;
634  j++) { // loop over the columns of the factor matrices
635  if (i == 0) {
636  Y->lambdas[j] = cblas_dnrm2(Y->dims[i], Y->factors[i] + j, Y->rank);
637  if (Y->lambdas[j] == 0.0) {
638  exit(-8);
639  }
640  cblas_dscal(Y->dims[i], 1.0 / Y->lambdas[j], Y->factors[i] + j,
641  Y->rank);
642  } else {
643  l = cblas_dnrm2(Y->dims[i], Y->factors[i] + j, Y->rank);
644  if (l == 0.0) {
645  exit(-8);
646  }
647  Y->lambdas[j] *= l;
648  cblas_dscal(Y->dims[i], 1.0 / l, Y->factors[i] + j, Y->rank);
649  }
650  }
651  }
652 }
653 
654 /*
655 TransposeM()
656  Description --
657  Transposes an input matrix A and stores it in A_T
658  Variables --
659  A) matrix to transpose
660  A_T) output matrix
661  n) the number of rows in A and the number of columns in A_T
662  m) the number of columms in A and the number of rows in A_T
663  i) iterator
664  j) iterator
665 
666 Notes: Perhaps just store it in the old matrix?
667 */
668 void TransposeM(double *A, double *A_T, long int rowsA, long int colsA) {
669  long int i;
670  for (i = 0; i < rowsA; i++) {
671  /*
672  m) copy m items each time
673  A
674  n) the distance between elements in a row of A
675  A_T)
676  1) the distance between elements in a row of A_T
677  */
678  cblas_dcopy(colsA, A + i, rowsA, A_T + colsA * i, 1);
679  }
680 }
681 
682 /*
683 Full_nMode_Matricization_RowMajor();
684  Description -- creates the full nmode matricization of a tensor
685  from the factor matrices of a Ktensor. This assumes the intputs are in
686  RowMajor order and returns the matricization in ColumnMajor ordering
687  F_n(KR!=n)^T
688 */
689 void Full_nMode_Matricization_RowMajor(tensor *T, ktensor *Y, long int n) {
690  double *KR, alpha, beta;
691 
692  KR = reinterpret_cast<double *>(
693  malloc(sizeof(double) * (Y->dims_product / Y->dims[n]) * Y->rank));
694 
695  Multi_KR_RowMajor(Y, KR, n);
696 
697  alpha = 1.0;
698  beta = 0.0;
699  cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, Y->dims[n],
700  (Y->dims_product / Y->dims[n]), Y->rank, alpha, Y->factors[n],
701  Y->rank, KR, Y->rank, beta, T->data, Y->dims[n]);
702  // int i_m = Y->dims[n];
703  // int i_n = (Y->dims_product / Y->dims[n]);
704  // int i_k = Y->rank;
705  // char t = 'T';
706  // char nt = 'N';
707  // dgemm_(&t, &nt, &i_m, &i_n, &i_k, &alpha, Y->factors[n], &i_k, KR, &i_k,
708  // &beta, T->data, &i_m);
709 
710  free(KR);
711 }
712 
713 /*
714 approximation_Error()
715  Computes the norm of a tensor and its current approximation given a KRP
716  and a factor matrix
717 */
718 double approximation_Error(double *X, double *KR, ktensor *Y, long int n) {
719  // I think you only wanna do this for n == 0 but idk, you could easily do it
720  // for the last one also
721  double alpha, beta;
722  alpha = 1.0;
723  beta = -1.0;
724 
725  // now we have X = X-Y, where Y is the approximation of X
726  cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, Y->dims[n],
727  (Y->dims_product / Y->dims[n]), Y->rank, alpha, Y->factors[n],
728  Y->rank, KR, Y->rank, beta, X, Y->dims[n]);
729 
730  return cblas_dnrm2(Y->dims_product, X, 1);
731 }
732 
743 double CP_ALS_efficient_error_computation(ktensor *Y, long int n,
744  double *MTTKRP, double *V, double *S,
745  double tensor_norm) {
746  double dotXY, dotYY, *lambda_array, e;
747  lambda_array = reinterpret_cast<double *>(malloc(sizeof(double) * Y->rank));
748 
749  dotXY = cblas_ddot(Y->dims[n] * Y->rank, Y->factors[n], 1, MTTKRP, 1);
750 
752 
753  cblas_dsyrk(CblasRowMajor, CblasUpper, CblasTrans, Y->rank, Y->dims[n], 1.0,
754  Y->factors[n], Y->rank, 0.0, S, Y->rank);
755 
756  Upper_Hadamard_RowMajor(Y->rank, Y->rank, V, S, V);
757 
758  cblas_dsymv(CblasRowMajor, CblasUpper, Y->rank, 1.0, V, Y->rank, Y->lambdas,
759  1, 0.0, lambda_array, 1);
760 
761  dotYY = cblas_ddot(Y->rank, lambda_array, 1, Y->lambdas, 1);
762 
763  e = ((tensor_norm * tensor_norm) - (2 * dotXY) + dotYY);
764 
765  free(lambda_array);
766 
767  return e;
768 }
769 
775 double CP_ALS_naive_error_computation(tensor *T, ktensor *Y, double *X,
776  double *KRP, long int n) {
777  double e;
778 
779  cblas_dcopy(T->dims_product, T->data, 1, X, 1);
780  cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, Y->dims[n],
781  (Y->dims_product / Y->dims[n]), Y->rank, 1.0, Y->factors[n],
782  Y->rank, KRP, Y->rank, -1.0, X, Y->dims_product / Y->dims[n]);
783  e = cblas_dnrm2(Y->dims_product, X, 1);
784 
785  return e;
786 }
787 
788 void dims_check(long int *dims, long int length) {
789  /*
790  This function takes an integer array of dimension and the array's
791  length. All entires in dims should be greater than 0 and length should be 1
792  or greater.
793  */
794 
795  long int i;
796 
797  if (length < 1) {
798  printf("From dims_check():: length < 1, Exit(-1)");
799  exit(-1);
800  }
801  if (dims == NULL) {
802  printf("From dims_check():: dims == NULL, Exit(-2)");
803  exit(-2);
804  }
805  for (i = 0; i < length; i++) {
806  if (dims[i] < 1) {
807  printf("From dims_check():: dims[%ld] < 1, Exit(-3)", i);
808  exit(-3);
809  }
810  }
811 }
812 
813 /*
814 reoder_Ktensor();
815  Reoders the factor matrices and dimension of a ktensor by removing the
816  nth mode. Only shuffles polong inters, no deep copies of factor matrices are
817  made.
818  1) Y the original ktnensor
819  2) nY the kentsor without the nth mode
820  Example: [0,1,2,3,4], n = 3
821  [0,1,2,4]
822 */
823 void reorder_Ktensor(ktensor *Y, ktensor *nY, long int n) {
824  long int i, j;
825  nY->nmodes = Y->nmodes - 1;
826  nY->factors =
827  reinterpret_cast<double **>(malloc(sizeof(double *) * nY->nmodes));
828  nY->dims = (long int *)malloc(sizeof(long int) * nY->nmodes);
829  nY->lambdas = reinterpret_cast<double *>(malloc(sizeof(double) * Y->rank));
830  nY->rank = Y->rank;
831  nY->dims_product = Y->dims_product / Y->dims[n];
832 
833  for (i = 0, j = 0; i < Y->nmodes; i++) {
834  if (i != n) {
835  nY->factors[j] = Y->factors[i];
836  nY->dims[j] = Y->dims[i];
837  j++;
838  }
839  }
840 }
841 /*
842 void compute_KRP_Indices();
843  Computes the factor matrix indeces for a given row in a khatri rao
844  product. The indeces are in reverse order so as to be in keeping with the
845  reverse order khatri rao product functions used here.
846 */
847 void compute_KRP_Indices(long int j, ktensor *Y, long int *indeces) {
848  long int i, p;
849  p = Y->dims_product;
850 
851  for (i = Y->nmodes - 1; i >= 0; i--) {
852  p = p / Y->dims[i];
853 
854  if (p != 0) indeces[i] = j / p;
855  j -= indeces[i] * p;
856  }
857  // printf("nmodes = %ld\n",Y->nmodes);
858  // printf("Indeces = ");
859  // for( i = 0; i < Y->nmodes; i++){
860  // printf("%ld,",indeces[i]);
861  //}
862  // printf("\n");
863  // Y->nmodes = Y->nmodes + 1;
864 }
865 
872 void wrapper_Parallel_Multi_revKRP(ktensor *Y, long int num_threads,
873  double *KRP) {
874  long int useful_num_threads =
875  num_threads; // The number of threads that shuould be used <= num_threads
876 
877  if (Y->dims_product < num_threads) useful_num_threads = Y->dims_product;
878 
879 #pragma omp parallel num_threads(useful_num_threads)
880  {
881  long int thread_id = omp_get_thread_num();
882  long int start, end;
883 
884  compute_Iteration_Space(thread_id, useful_num_threads, Y->dims_product,
885  &start, &end);
886  parallel_Multi_KR_RowMajor(Y, &KRP[start * Y->rank], start, end);
887  }
888 }
889 
890 /*
891 Gen_Tensor();
892  This function generates a Ktensor using uniformily random numbers. Given
893  a Rank, a number of dimensions, and the sizes of those dimensiosn random
894  factor matrices are generated and then a full tensor is created from those.
895  All memory for the objects is allocated inside of the function.
896  Call clean_Up_Gen_Tensor() to free all the memory when finished.
897 */
898 
899 void Gen_Tensor(ktensor *Y, tensor *T, long int R, long int N, long int *D,
900  double noise) {
901  /*
902  A function for returning a randomly generated Rank R and N mode tensor
903  this function will allocate all of the memory needed for the tensor
904  just passing the declared ktensor and tensor objects.
905  1) Y, a ktensor
906  2) T, a tensor
907  3) N, the number of modes in the
908  4) R, the rank of the tensor
909  5) D, the length of the dimensions of the tensor, D is length N
910  */
911 
912  long int i, j;
913  double alpha, tensor_norm;
914 
915  srand(time(NULL));
916 
917  // Set up the ktensor Y
918  Y->nmodes = N;
919  Y->rank = R;
920  Y->dims = (long int *)malloc(sizeof(long int) * Y->nmodes);
921  Y->factors =
922  reinterpret_cast<double **>(malloc(sizeof(double *) * Y->nmodes));
923  for (i = 0; i < Y->nmodes; i++) Y->dims[i] = D[i];
924 
925  Y->dims_product = 1;
926  for (i = 0; i < Y->nmodes; i++) {
927  Y->factors[i] = reinterpret_cast<double *>(
928  malloc(sizeof(double) * Y->rank * Y->dims[i]));
929  Y->dims_product *= Y->dims[i];
930  for (j = 0; j < Y->rank * Y->dims[i]; j++) {
931  Y->factors[i][j] =
932  (static_cast<double>(rand()) / static_cast<double>(RAND_MAX)) * 2 - 1;
933  }
934  }
935  Y->lambdas = reinterpret_cast<double *>(malloc(sizeof(double) * Y->rank));
936  memset(Y->lambdas, 0, sizeof(double) * Y->rank);
937 
938  // Set up the tensor T
939  T->nmodes = N;
940  T->dims = (long int *)malloc(sizeof(long int) * Y->nmodes);
941  T->data =
942  reinterpret_cast<double *>(malloc(sizeof(double) * Y->dims_product));
943  T->dims_product = Y->dims_product;
944 
945  for (i = 0; i < T->nmodes; i++) T->dims[i] = D[i];
946 
948 
949  tensor_norm = cblas_dnrm2(Y->dims_product, T->data, 1);
950 
951  if (noise > 0.0) {
952  alpha = (noise * tensor_norm) / sqrt(Y->dims_product);
953 
954  // add alpha * (a normally random number to T->data)
955  double U1, U2, Z1, Z2;
956  for (i = 0; i < Y->dims_product - 1; i += 2) {
957  // Generate 2 uniformly random numbers in range 0 to 1
958  U1 = (static_cast<double>(rand()) / static_cast<double>(RAND_MAX));
959  U2 = (static_cast<double>(rand()) / static_cast<double>(RAND_MAX));
960 
961  // Transform to 2 normally distributed numbers in range 0 to 1
962  Z1 = sqrt(-2.0 * log(U1)) * cos(2 * M_PI * U2);
963  Z2 = sqrt(-2.0 * log(U1)) * sin(2 * M_PI * U2);
964 
965  // Move then to the range -1 to 1
966  Z1 = (Z1 * 2.0) - 1.0;
967  Z2 = (Z2 * 2.0) - 1.0;
968 
969  T->data[i] += alpha * Z1;
970  T->data[i + 1] += alpha * Z2;
971  }
972  if ((Y->dims_product % 2) == 0) {
973  // do the last element
974  // Generate 2 uniformly random numbers in range 0 to 1
975  U1 = (static_cast<double>(rand()) / static_cast<double>(RAND_MAX));
976  U2 = (static_cast<double>(rand()) / static_cast<double>(RAND_MAX));
977 
978  // Transform to 2 normally distributed numbers in range 0 to 1
979  Z1 = sqrt(-2.0 * log(U1)) * cos(2 * M_PI * U2);
980 
981  // Move then to the range -1 to 1
982  Z1 = (Z1 * 2.0) - 1.0;
983 
984  T->data[Y->dims_product - 1] += alpha * Z1;
985  }
986  }
987 }
988 
989 /*
990 parallel_Multi_KR_RowMajor()
991 Computes a RowMajor KPR limited by start and end which are row indeces
992 */
993 void parallel_Multi_KR_RowMajor(ktensor *Y, double *C, long int start,
994  long int end) {
995  if (Y->nmodes == 2) {
996  /*
997  If only 2 factor matrices, then no partials are used call this
998  Function instead
999  */
1000  parallel_KR_RowMajor(Y, C, start, end);
1001  } else {
1002  long int i, j, *indexers, nmodes;
1003  double *partial_Hadamards;
1004 
1005  nmodes = Y->nmodes;
1006 
1007  indexers = (long int *)malloc(sizeof(long int) *
1008  nmodes); // An indexer for each mode-1
1009  partial_Hadamards = reinterpret_cast<double *>(
1010  malloc(sizeof(double) * Y->rank *
1011  (nmodes)-2)); // N-3 extra vectors, for storing
1012  // inermediate hadamard prducts
1013 
1014  memset(indexers, 0, sizeof(long int) * (nmodes));
1015 
1016  // Compute the starting indices
1017  compute_KRP_Indices(start, Y, indexers);
1018 
1019  // Initialize the partial_Hadamards to the starting values, i indexes the
1020  // partial_Hadamards Hadamard the last two factor matrices and stores them
1021  // in the last partial hadamard
1022  vdMul(Y->rank, &Y->factors[nmodes - 2][Y->rank * indexers[nmodes - 2]],
1023  &Y->factors[nmodes - 1][Y->rank * indexers[nmodes - 1]],
1024  &partial_Hadamards[Y->rank * (nmodes - 3)]);
1025 
1026  // For each other partial Hadamard multiply the previous partial Hadamard
1027  // with a row of a factor matrix
1028  for (i = nmodes - 4; i >= 0; i--)
1029  vdMul(Y->rank, &Y->factors[i + 1][Y->rank * indexers[i + 1]],
1030  &partial_Hadamards[Y->rank * (i + 1)],
1031  &partial_Hadamards[Y->rank * i]);
1032 
1033  // Iterate over the rows of the KRP matrix
1034  for (i = start; i < end; i++) {
1035  /*
1036  compute the current row using vd mul, multiply first partial
1037  hadamard with row of first factor matrix 1) length of a row 2) always
1038  some row of the 0th factor matrix 3) always the first partial_hadamard
1039  4) the current row the KR matrix
1040 
1041  */
1042  vdMul(Y->rank, &Y->factors[0][Y->rank * indexers[0]],
1043  &partial_Hadamards[0], &C[Y->rank * (i - start)]);
1044 
1045  // update the indexers array
1046  for (j = 0; j < nmodes; j++) {
1047  indexers[j] += 1;
1048  if (indexers[j] >= Y->dims[j]) {
1049  indexers[j] = 0;
1050  } else {
1051  break;
1052  }
1053  }
1054 
1055  parallel_update_Partial_Hadamards(Y, indexers, partial_Hadamards);
1056  }
1057  free(indexers);
1058  free(partial_Hadamards);
1059  }
1060 }
1061 
1066 void parallel_KR_RowMajor(ktensor *Y, double *C, long int start, long int end) {
1067  long int i, j, *indexers, c;
1068 
1069  indexers = (long int *)malloc(sizeof(long int) * Y->nmodes);
1070  memset(indexers, 0, sizeof(long int) * (Y->nmodes));
1071 
1072  compute_KRP_Indices(start, Y, indexers);
1073 
1074  c = start;
1075  i = indexers[1];
1076  j = indexers[0];
1077  for (; (c < end) && (i < Y->dims[1]); i++) {
1078  if (c != start) j = 0;
1079  for (; (c < end) && (j < Y->dims[0]); j++) {
1080  /*
1081  vdMul call
1082  1) number of elements to multiply together
1083  2) pointer to first vector
1084  3) pointer to second vector
1085  4) pointer to output location
1086  -i*Y->rank*Y->dims[0], the size of the right matrix times
1087  i -j*Y->rank, the length of a row times j
1088  */
1089  vdMul(Y->rank, &Y->factors[1][i * Y->rank], &Y->factors[0][j * Y->rank],
1090  &C[(c - start) * Y->rank]);
1091  c++;
1092  }
1093  }
1094  free(indexers);
1095 }
1096 
1097 void parallel_update_Partial_Hadamards(ktensor *Y, long int *indexers,
1098  double *partial_Hadamards) {
1099  long int update_from, i;
1100  long int nmodes = Y->nmodes;
1101 
1102  for (i = 0; i < nmodes; i++) {
1103  update_from = i;
1104  if (indexers[i] != 0) break;
1105  }
1106 
1107  // if update_from is 0 we do not have do update any partials
1108  if (update_from == 0) {
1109  return;
1110  } else if (update_from >= nmodes - 2) {
1111  /*
1112  If update_from is the second to last index or the last index,
1113  we have to update all of the partials, including combining the last
1114  two factor matrices seperately
1115  */
1116  /*
1117  call vdMul to get the hadamard product of the last two factor
1118  matrices 1) length of the vectors 2) second to last factor matrix 3) last
1119  factor matrix 4) output location in partial_Hadmards
1120  */
1121  vdMul(Y->rank, &Y->factors[nmodes - 2][Y->rank * indexers[nmodes - 2]],
1122  &Y->factors[nmodes - 1][Y->rank * indexers[nmodes - 1]],
1123  &partial_Hadamards[Y->rank * (nmodes - 3)]);
1124  // Update the rest of the partial Hadamards
1125  for (i = nmodes - 4; i >= 0; i--) { // i iterates over the partials array
1126  /*
1127  call vdMul to get the hadamards product of the current factor
1128  matrix based off i and the partial_Hadamards one stride to the right
1129  Note: the factor whose row we want is i-1
1130  1) the lenght of the vectors
1131  2) the (i+1)th factor matrices row
1132  3) the partial_Hadamard one stride to the right
1133  4) output location in patial_Hadamards
1134  */
1135  vdMul(Y->rank, &Y->factors[i + 1][Y->rank * indexers[i + 1]],
1136  &partial_Hadamards[Y->rank * (i + 1)],
1137  &partial_Hadamards[Y->rank * i]);
1138  }
1139  } else { // the case that we are updating from somewhere that is update_from
1140  // < Y->nmodes - 4, not the last or second to last
1141  for (i = update_from - 1; i >= 0; i--) {
1142  /*
1143  call vdMul to update the partial_Hadamards to the left of and
1144  including i
1145  */
1146  vdMul(Y->rank, &Y->factors[i + 1][Y->rank * indexers[i + 1]],
1147  &partial_Hadamards[Y->rank * (i + 1)],
1148  &partial_Hadamards[Y->rank * i]);
1149  }
1150  }
1151 }
1152 
1153 /*
1154  compute_Iteration_Space()
1155  Computes the iteration space a given thread should work on.
1156  1) thread_id
1157  2) total number of threads
1158  3) stat of the thread's space
1159  4) end of the thread's space
1160  5) space to distribute
1161 */
1162 void compute_Iteration_Space(long int thread_id, long int num_threads,
1163  long int iter_space, long int *start,
1164  long int *end) {
1165  /*
1166  1) b, block size
1167  2) limit, bound on thread_ids who get +1 on block size
1168  */
1169  long int b, limit;
1170 
1171  limit = iter_space % num_threads;
1172  b = iter_space / num_threads;
1173 
1174  if (thread_id < limit) {
1175  *start = thread_id * (b + 1);
1176  *end = *start + (b + 1);
1177  } else {
1178  *start = (limit * (b + 1)) + ((thread_id - limit) * b);
1179  *end = *start + b;
1180  }
1181 }
1182 
1183 /*
1184  Function for freeing the memory of ktnensor and tensor objects
1185 */
1186 void clean_Up_Gen_Tensor(ktensor *Y, tensor *T) {
1187  long int i;
1188 
1189  // Ktensor Y memory
1190  for (i = 0; i < Y->nmodes; i++) {
1191  free(Y->factors[i]);
1192  }
1193  free(Y->factors);
1194  free(Y->dims);
1195  free(Y->lambdas);
1196 
1197  // Tensor T memory
1198  free(T->data);
1199  free(T->dims);
1200 }
1201 
1202 /*
1203  1) rank
1204  2) noise
1205  3) num_threads
1206  4) nmodes
1207  5) array of dimensions
1208 */
1209 void process_inputs(long int argc, char *argv[], tensor_inputs *inputs) {
1210  long int i, j = 1;
1211  long int offset; // 0)program_name 1)rank, 2)num_threads, 3)nmodes, 4)
1212  // max_iters, 5)eps
1213 
1214  inputs->rank = atoi(argv[j++]); // 1
1215  // inputs->noise = atof(argv[2]);
1216  inputs->num_threads = atoi(argv[j++]); // 2
1217  inputs->max_iters = atoi(argv[j++]); // 3
1218  inputs->tolerance = atof(argv[j++]); // 4
1219  inputs->nmodes = atoi(argv[j++]); // 5
1220 
1221  offset = j;
1222 
1223  printf("N=%ld\n", inputs->nmodes);
1224  inputs->dims = (long int *)malloc(sizeof(long int) * inputs->nmodes);
1225  for (i = 0; i < inputs->nmodes; i++) inputs->dims[i] = atoi(argv[offset + i]);
1226 
1227  if (inputs->num_threads < 1) {
1228  printf(
1229  "From process_inputs(), num_threads = %ld, num_threads must be at least "
1230  "1\nExit\n",
1231  inputs->num_threads);
1232  exit(-1);
1233  }
1234  if (inputs->rank < 1) {
1235  printf("From process_inputs(), rank = %ld, rank must be at least 1\nExit\n",
1236  inputs->rank);
1237  exit(-1);
1238  }
1239  dims_check(inputs->dims, inputs->nmodes);
1240 
1241  printf("****************************************\n");
1242  printf("Processed inputs:\n");
1243  printf("inputs->rank = %ld\n", inputs->rank);
1244  printf("inputs->num_threads = %ld\n", inputs->num_threads);
1245  printf("inputs->max_iters = %ld\n", inputs->max_iters);
1246  printf("inputs->tolerance = %.20lf\n", inputs->tolerance);
1247  printf("inputs->nmodes = %ld\n", inputs->nmodes);
1248  printf("inputs->dims[%ld", inputs->dims[0]);
1249  for (i = 1; i < inputs->nmodes; i++) {
1250  printf(",%ld", inputs->dims[i]);
1251  }
1252  printf("]\n");
1253  printf("****************************************\n");
1254 }
1255 
1256 void destroy_inputs(tensor_inputs *inputs) { free(inputs->dims); }
1257 
1258 /*
1259  LR_Ktensor_Reodering_newY()
1260  This function is for reordering the factor matrices of a given Ktensor
1261  It takes the left or right factor matrices in relation to some index
1262  Mallocs memory that needs to be free'd later on.
1263 */
1264 void LR_Ktensor_Reordering_newY(ktensor *Y, ktensor *nY, long int n,
1265  direction D) {
1266  long int i, jump;
1267 
1268  if (D == ::direction::left) {
1269  nY->nmodes = n;
1270  jump = 0;
1271  } else {
1272  nY->nmodes = Y->nmodes - (n + 1);
1273  jump = n + 1;
1274  }
1275 
1276  nY->factors =
1277  reinterpret_cast<double **>(malloc(sizeof(double *) * nY->nmodes));
1278  nY->dims = (long int *)malloc(sizeof(long int) * nY->nmodes);
1279  nY->rank = Y->rank;
1280  nY->lambdas = reinterpret_cast<double *>(malloc(sizeof(double) * nY->rank));
1281 
1282  for (i = 0; i < nY->nmodes; i++) {
1283  nY->factors[i] = Y->factors[i + jump];
1284  nY->dims[i] = Y->dims[i + jump];
1285  }
1286  for (i = 0; i < nY->rank; i++) nY->lambdas[i] = Y->lambdas[i];
1287  for (i = 0, nY->dims_product = 1; i < nY->nmodes; i++)
1288  nY->dims_product *= nY->dims[i];
1289 }
1290 
1291 /*
1292  LR_Ktensor_Reodering_newY()
1293  This function is for reordering the factor matrices of a given Ktensor.
1294  It takes the left or right factor matrices in relation to some index.
1295  Does not allocate any new memory.
1296 */
1297 void LR_Ktensor_Reordering_existingY(ktensor *Y, ktensor *nY, long int n,
1298  direction D) {
1299  long int i, jump;
1300 
1301  if (D == ::direction::left) {
1302  nY->nmodes = n;
1303  jump = 0;
1304  } else {
1305  nY->nmodes = Y->nmodes - (n + 1);
1306  jump = n + 1;
1307  }
1308 
1309  nY->rank = Y->rank;
1310  nY->dims_product = 1;
1311  for (i = 0; i < nY->nmodes; i++) {
1312  nY->factors[i] = Y->factors[i + jump];
1313  nY->dims[i] = Y->dims[i + jump];
1314  nY->dims_product *= nY->dims[i];
1315  }
1316  for (i = 0; i < nY->rank; i++) nY->lambdas[i] = Y->lambdas[i];
1317 }
1318 
1319 /*
1320  Removes a mode from an existing ktensor.
1321  Does NOT reset the memory.
1322 */
1323 void remove_mode_Ktensor(ktensor *Y, long int n) {
1324  long int i, j;
1325 
1326  if (n < 0 || n >= Y->nmodes) {
1327  printf("In remove_mode_Ktensor() invalid value of n == %ld\nExit\n", n);
1328  exit(-1);
1329  }
1330 
1331  Y->nmodes -= 1;
1332  for (i = 0, j = 0; i < Y->nmodes + 1; j++) {
1333  if (j == n) {
1334  // do nothing
1335  } else {
1336  Y->dims[i] = Y->dims[j];
1337  Y->factors[i] = Y->factors[j];
1338  i++;
1339  }
1340  }
1341 }
1342 
1343 /*
1344  Destroys a given ktensor object
1345  clear_factors, is a flag that whne == 1 will tell the funciton to free
1346  the factor matrices also.
1347 */
1348 void destruct_Ktensor(ktensor *Y, long int clear_factors) {
1349  long int i;
1350 
1351  if ((clear_factors != 0) && (clear_factors != 1)) {
1352  printf(
1353  "clear_factors argument in destruct_Ktensor() was not 0 or 1. Return. "
1354  "%ld\n",
1355  clear_factors);
1356  return;
1357  }
1358 
1359  if (clear_factors == 1) {
1360  for (i = 0; i < Y->nmodes; i++) free(Y->factors[i]);
1361  } else {
1362  for (i = 0; i < Y->nmodes; i++) Y->factors[i] = NULL;
1363  }
1364 
1365  free(Y->factors);
1366  free(Y->dims);
1367  free(Y->lambdas);
1368 }
1369 
1370 void destruct_Tensor(tensor *T) {
1371  free(T->data);
1372  free(T->dims);
1373 }
1374 
1375 /*
1376  swaps the data for tensor t1 and t2
1377 */
1378 void tensor_data_swap(tensor *t1, tensor *t2) {
1379  double *M;
1380  M = t1->data;
1381  t1->data = t2->data;
1382  t2->data = M;
1383 }
1384 
1385 /*
1386 
1387 */
1388 void LR_tensor_Reduction(tensor *T, tensor *nT, long int n, direction D) {
1389  long int i, jump;
1390 
1391  if (D == ::direction::left) {
1392  jump = 0;
1393  nT->nmodes = n;
1394  } else { // D == ::direction::right
1395  jump = n + 1;
1396  nT->nmodes = T->nmodes - jump;
1397  }
1398 
1399  // nT->data, this function doest not deal with the data attribute
1400  // use void tensor_data_swap to affect data
1401 
1402  nT->dims_product = 1;
1403  for (i = 0; i < nT->nmodes; i++) {
1404  nT->dims[i] = T->dims[i + jump];
1405  nT->dims_product *= nT->dims[i];
1406  }
1407 }
1408 
1409 /*
1410  Creates a copy of a ktensor object and stores it in
1411  the given address.
1412 */
1413 void ktensor_copy_constructor(ktensor *Y, ktensor *nY) {
1414  long int i;
1415 
1416  nY->nmodes = Y->nmodes;
1417  nY->rank = Y->rank;
1418 
1419  nY->dims = (long int *)malloc(sizeof(long int) * nY->nmodes);
1420  nY->factors =
1421  reinterpret_cast<double **>(malloc(sizeof(double *) * nY->nmodes));
1422  nY->lambdas = reinterpret_cast<double *>(malloc(sizeof(double) * nY->rank));
1423 
1424  nY->dims_product = 1;
1425  for (i = 0; i < nY->nmodes; i++) {
1426  nY->dims[i] = Y->dims[i];
1427  nY->factors[i] = Y->factors[i];
1428  nY->dims_product *= nY->dims[i];
1429  }
1430 
1431  cblas_dcopy(nY->rank, Y->lambdas, 1, nY->lambdas, 1);
1432 }
1433 
1434 direction opposite_direction(direction D) {
1435  if (D != ::direction::left && D != ::direction::right) {
1436  printf("In opposite_direction input D was invalid!\nExit\n");
1437  exit(-1);
1438  }
1439 
1440  if (D == ::direction::left) {
1441  return ::direction::right;
1442  } else {
1443  return ::direction::left;
1444  }
1445 }
1446 
1447 #endif // DIMTREE_DDTTENSOR_HPP_
void reorder_Ktensor(ktensor *Y, ktensor *nY, long int n)
Definition: ddttensor.hpp:823
void normalize_Factor_Matrix_RowMajor(ktensor *Y, long int n)
Definition: ddttensor.hpp:614
void ktensor_copy_constructor(ktensor *Y, ktensor *nY)
Definition: ddttensor.hpp:1413
long int CompareM(double *A, double *B, long int nRows, long int nCols, double eps)
Definition: ddttensor.hpp:397
void Upper_Hadamard_RowMajor(long int nRows, long int nCols, double *A, double *B, double *C)
Definition: ddttensor.hpp:372
void process_inputs(long int argc, char *argv[], tensor_inputs *inputs)
Definition: ddttensor.hpp:1209
void printM_RowMajor(double *M, long int num_cols, long int num_rows)
Definition: ddttensor.hpp:39
void dims_check(long int *dims, long int length)
Definition: ddttensor.hpp:788
void tensor_data_swap(tensor *t1, tensor *t2)
Definition: ddttensor.hpp:1378
void LR_tensor_Reduction(tensor *T, tensor *nT, long int n, direction D)
Definition: ddttensor.hpp:1388
void Full_nMode_Matricization_RowMajor(tensor *T, ktensor *Y, long int n)
Definition: ddttensor.hpp:689
double CP_ALS_efficient_error_computation(ktensor *Y, long int n, double *MTTKRP, double *V, double *S, double tensor_norm)
CP_ALS_efficient_error_computation(); needs to compute dotXX - 2*dot XY + dotYY 1) Y...
Definition: ddttensor.hpp:743
void Gen_Tensor(ktensor *Y, tensor *T, long int R, long int N, long int *D, double noise)
Definition: ddttensor.hpp:899
double CP_ALS_naive_error_computation(tensor *T, ktensor *Y, double *X, double *KRP, long int n)
Computes the error between T and Y.
Definition: ddttensor.hpp:775
void destruct_Ktensor(ktensor *Y, long int clear_factors)
Definition: ddttensor.hpp:1348
void update_Partial_Hadamards(ktensor *Y, long int *indexers, double *partial_Hadamards, double **reordered_Factors)
Definition: ddttensor.hpp:204
void print_tensor(tensor *T, long int show_data)
Definition: ddttensor.hpp:89
void KR_RowMajor(ktensor *Y, double *C, long int n)
Definition: ddttensor.hpp:278
void clean_Up_Gen_Tensor(ktensor *Y, tensor *T)
Definition: ddttensor.hpp:1186
void printM_ColMajor(double *M, long int num_cols, long int num_rows)
Definition: ddttensor.hpp:19
void LR_Ktensor_Reordering_existingY(ktensor *Y, ktensor *nY, long int n, direction D)
Definition: ddttensor.hpp:1297
void destroy_inputs(tensor_inputs *inputs)
Definition: ddttensor.hpp:1256
void Multi_KR_RowMajor(ktensor *Y, double *C, long int n)
Definition: ddttensor.hpp:303
double approximation_Error(double *X, double *KR, ktensor *Y, long int n)
Definition: ddttensor.hpp:718
direction opposite_direction(direction D)
Definition: ddttensor.hpp:1434
void vdMul(long int n, double *a, double *b, double *c)
Definition: ddttensor.hpp:12
void print_dgemv_inputs(CBLAS_ORDER dgemv_layout, CBLAS_TRANSPOSE transA, long int m, long int n, double alpha, long int T_offset, long int tensor_stride, long int A_offset, long int A_stride, double beta, long int output_col_stride, long int output_stride)
Definition: ddttensor.hpp:149
void do_SYRKs_RowMajor(ktensor *Y, double **SYRKs)
Definition: ddttensor.hpp:591
void reorder_Factors(ktensor *Y, double **reordered_Factors, long int *reordered_Dims, long int n)
Definition: ddttensor.hpp:189
void destruct_Tensor(tensor *T)
Definition: ddttensor.hpp:1370
void normalize_Ktensor_RowMajor(ktensor *Y)
Definition: ddttensor.hpp:628
void parallel_update_Partial_Hadamards(ktensor *Y, long int *indexers, double *partial_Hadamards)
Definition: ddttensor.hpp:1097
void TransposeM(double *A, double *A_T, long int rowsA, long int colsA)
Definition: ddttensor.hpp:668
void print_Ktensor_RowMajor(ktensor *Y)
Definition: ddttensor.hpp:56
void compute_KRP_Indices(long int j, ktensor *Y, long int *indeces)
Definition: ddttensor.hpp:847
void wrapper_Parallel_Multi_revKRP(ktensor *Y, long int num_threads, double *KRP)
Wrapper function for the reverse KRP of a ktensor Y 1) Ktensor object 2) number of threads to use 3)...
Definition: ddttensor.hpp:872
void LR_Ktensor_Reordering_newY(ktensor *Y, ktensor *nY, long int n, direction D)
Definition: ddttensor.hpp:1264
void MHada_RowMajor(ktensor *Y, double **SYRKs, double *V, long int n)
Definition: ddttensor.hpp:562
void remove_mode_Ktensor(ktensor *Y, long int n)
Definition: ddttensor.hpp:1323
void print_dgemm_inputs(CBLAS_ORDER dgemm_layout, CBLAS_TRANSPOSE transA, CBLAS_TRANSPOSE transB, long int m, long int n, long int k, double alpha, long int strideA, long int strideB, double beta, long int strideC)
Definition: ddttensor.hpp:110
void MTTKRP_RowMajor(tensor *T, double *K, double *C, long int rank, long int n)
Definition: ddttensor.hpp:413
void parallel_Multi_KR_RowMajor(ktensor *Y, double *C, long int start, long int end)
Definition: ddttensor.hpp:993
void compute_Iteration_Space(long int thread_id, long int num_threads, long int iter_space, long int *start, long int *end)
Definition: ddttensor.hpp:1162
void parallel_KR_RowMajor(ktensor *Y, double *C, long int start, long int end)
parallel_KR_RowMajor(); Does the parallel KRP of the factor matrices in the ktensor Y ...
Definition: ddttensor.hpp:1066