3 #ifndef DIMTREE_DDTTENSOR_HPP_ 4 #define DIMTREE_DDTTENSOR_HPP_ 6 #include "dimtree/ddttensor.h" 12 void vdMul(
long int n,
double *a,
double *b,
double *c) {
13 for (
long int i = 0; i < n; i++) c[i] = a[i] * b[i];
24 for (j = 0; j < num_rows; j++) {
25 for (i = 0; i < num_cols; i++) {
27 printf(
",%0.10lf", M[i * num_rows + j]);
29 printf(
",%0.10lf", M[i * num_rows + j]);
44 for (j = 0; j < num_rows; j++) {
45 for (i = 0; i < num_cols; i++) {
47 printf(
",%0.10lf", M[j * num_cols + i]);
49 printf(
",%0.10lf", M[j * num_cols + i]);
59 printf(
"=========Ktensor========\n");
60 printf(
"Rank = %ld\n", Y->rank);
61 printf(
"Nmodes = %ld\n", Y->nmodes);
63 for (i = 0; i < Y->nmodes; i++) {
64 if (i != Y->nmodes - 1)
65 printf(
"%ld,", Y->dims[i]);
67 printf(
"%ld]\n", Y->dims[i]);
69 printf(
"Num_Eles = %ld\n", Y->dims_product);
70 printf(
"Lambdas = [");
71 for (i = 0; i < Y->rank; i++) {
73 printf(
"%.5lf,", Y->lambdas[i]);
75 printf(
"%.5lf]\n", Y->lambdas[i]);
77 for (i = 0; i < Y->nmodes; i++) {
78 printf(
"Factor[%ld]", i);
81 printf(
"========================\n");
91 printf(
"==========Tensor========\n");
92 printf(
"T->nmodes = %ld\n", T->nmodes);
93 printf(
"T->dims_product = %ld\n", T->dims_product);
95 for (i = 0; i < T->nmodes; i++) {
96 printf(
"%ld,", T->dims[i]);
100 if (show_data == 1) {
104 printf(
"========================\n");
111 CBLAS_TRANSPOSE transB,
long int m,
long int n,
112 long int k,
double alpha,
long int strideA,
113 long int strideB,
double beta,
long int strideC) {
114 printf(
"-------------------------------\n");
115 printf(
"dgemm_inputs :: \n");
117 if (dgemm_layout == CblasColMajor) {
118 printf(
"ColMajor\n");
120 printf(
"RowMajor\n");
123 if (transA == CblasNoTrans) {
129 if (transB == CblasNoTrans) {
135 printf(
"m = %ld\n", m);
136 printf(
"n = %ld\n", n);
137 printf(
"k = %ld\n", k);
138 printf(
"alpha = %lf\n", alpha);
139 printf(
"strideA = %ld\n", strideA);
140 printf(
"strideB = %ld\n", strideB);
141 printf(
"beta = %lf\n", beta);
142 printf(
"strideC = %ld\n", strideC);
143 printf(
"-------------------------------\n");
150 long int m,
long int n,
double alpha,
long int T_offset,
151 long int tensor_stride,
long int A_offset,
152 long int A_stride,
double beta,
153 long int output_col_stride,
long int output_stride) {
154 printf(
"-------------------------------\n");
155 printf(
"dgemv_inputs :: \n");
157 if (dgemv_layout == CblasColMajor) {
158 printf(
"ColMajor\n");
160 printf(
"RowMajor\n");
163 if (transA == CblasNoTrans) {
168 printf(
"m = %ld\n", m);
169 printf(
"n = %ld\n", n);
170 printf(
"alpha = %lf\n", alpha);
171 printf(
"T_offset = %ld\n", T_offset);
172 printf(
"tensor_stride= %ld\n", tensor_stride);
173 printf(
"A_offset = %ld\n", A_offset);
174 printf(
"A_stride = %ld\n", A_stride);
175 printf(
"beta = %lf\n", beta);
176 printf(
"output_col_stride = %ld\n", output_col_stride);
177 printf(
"output_stride = %ld\n", output_stride);
178 printf(
"-------------------------------\n");
190 long int *reordered_Dims,
long int n) {
192 for (i = 0, j = 0; i < Y->nmodes; i++) {
194 reordered_Factors[j] = Y->factors[i];
195 reordered_Dims[j] = Y->dims[i];
205 double *partial_Hadamards,
206 double **reordered_Factors) {
208 long int update_from, i;
218 for (i = 0; i < Y->nmodes - 1; i++) {
220 if (indexers[i] != 0)
break;
224 if (update_from == 0) {
226 }
else if (update_from >= Y->nmodes - 3) {
231 for (i = Y->nmodes - 4; i >= 0;
235 if (i == Y->nmodes - 4) {
243 &reordered_Factors[Y->nmodes - 3]
244 [Y->rank * indexers[Y->nmodes - 3]],
245 &reordered_Factors[Y->nmodes - 2]
246 [Y->rank * indexers[Y->nmodes - 2]],
247 &partial_Hadamards[Y->rank * i]);
259 vdMul(Y->rank, &reordered_Factors[i + 1][Y->rank * indexers[i + 1]],
260 &partial_Hadamards[Y->rank * (i + 1)],
261 &partial_Hadamards[Y->rank * i]);
266 for (i = update_from - 1; i >= 0; i--) {
271 vdMul(Y->rank, &reordered_Factors[i + 1][Y->rank * indexers[i + 1]],
272 &partial_Hadamards[Y->rank * (i + 1)],
273 &partial_Hadamards[Y->rank * i]);
279 long int i, j, lF, rF;
280 if (n != Y->nmodes - 1)
287 for (i = 0; i < Y->dims[lF]; i++) {
288 for (j = 0; j < Y->dims[rF];
290 vdMul(Y->rank, &Y->factors[lF][i * Y->rank], &Y->factors[rF][j * Y->rank],
291 &C[i * Y->rank * Y->dims[rF] + j * Y->rank]);
304 if (Y->nmodes == 3) {
307 long int i, j, *indexers, *reordered_Dims;
308 double *partial_Hadamards, **reordered_Factors;
311 reinterpret_cast<double **
>(malloc(
sizeof(
double *) * Y->nmodes - 1));
312 reordered_Dims = (
long int *)malloc(
sizeof(
long int) * Y->nmodes - 1);
313 indexers = (
long int *)malloc(
sizeof(
long int) * Y->nmodes -
315 partial_Hadamards =
reinterpret_cast<double *
>(
316 malloc(
sizeof(
double) * Y->rank * (Y->nmodes) -
322 memset(indexers, 0,
sizeof(
long int) * (Y->nmodes - 1));
326 for (i = Y->nmodes - 4; i >= 0; i--) {
327 if (i == Y->nmodes - 4) {
328 vdMul(Y->rank, &reordered_Factors[Y->nmodes - 3][0],
329 &reordered_Factors[Y->nmodes - 2][0],
330 &partial_Hadamards[Y->rank * i]);
332 vdMul(Y->rank, &reordered_Factors[i + 1][0],
333 &partial_Hadamards[Y->rank * (i + 1)],
334 &partial_Hadamards[Y->rank * i]);
339 for (i = 0; i < Y->dims_product / Y->dims[n]; i++) {
347 vdMul(Y->rank, &reordered_Factors[0][Y->rank * indexers[0]],
348 &partial_Hadamards[0], &C[Y->rank * i]);
351 for (j = 0; j < Y->nmodes - 1; j++) {
353 if (indexers[j] >= reordered_Dims[j]) {
365 free(partial_Hadamards);
366 free(reordered_Dims);
367 free(reordered_Factors);
373 double *B,
double *C) {
376 for (i = 0; i < nRows; i++) {
377 for (j = i; j < nCols; j++) {
378 C[i * nCols + j] = A[i * nCols + j] * B[i * nCols + j];
397 long int CompareM(
double *A,
double *B,
long int nRows,
long int nCols,
399 if (eps == -1.0) eps = 2.220446049250313E-16;
401 double max_dif = eps;
402 long int are_same = 1;
403 for (
long int i = 0; i < nRows * nCols; i++) {
404 if (fabs(A[i] - B[i]) > max_dif) {
405 max_dif = A[i] - B[i];
409 printf(
"Max_dif=%.20lf\n", max_dif);
416 printf(
"Tensor pointer is null, error\n");
420 printf(
"Khatri Rhao product pointer is null, error\n");
424 printf(
"Output Matrix is null!\n");
428 printf(
"Rank must be greater than 1!\n");
431 if (n > T->nmodes - 1) {
432 printf(
"n is larger than the number of modes in the tensor, T->nmodes!\n");
436 printf(
"n cannot be a negative number!\n");
454 ncols = T->dims_product / T->dims[n];
487 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, rank, nDim, ncols,
488 alpha, K, rank, T->data, nDim, beta, C, rank);
498 for (i = 0; i < n; i++) {
503 for (i = n + 1; i < T->nmodes; i++) {
508 for (i = 0; i < nmats; i++) {
546 cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nDim, rank, ncols,
547 alpha, T->data + i * nDim * ncols, ncols,
548 K + i * ncols * rank, rank, beta, C, rank);
572 for (c = 0; c < Y->nmodes - 1;
584 cblas_dcopy(Y->rank * Y->rank, SYRKs[i], 1, V, 1);
596 for (i = 0; i < Y->nmodes; i++) {
597 cblas_dsyrk(CblasRowMajor, CblasUpper, CblasTrans, Y->rank, Y->dims[i],
598 alpha, Y->factors[i], Y->rank, beta, SYRKs[i], Y->rank);
616 for (i = 0; i < Y->rank; i++) {
617 Y->lambdas[i] = cblas_dnrm2(Y->dims[n], Y->factors[n] + i, Y->rank);
618 cblas_dscal(Y->dims[n], 1.0 / Y->lambdas[i], Y->factors[n] + i, Y->rank);
632 for (i = 0; i < Y->nmodes; i++) {
633 for (j = 0; j < Y->rank;
636 Y->lambdas[j] = cblas_dnrm2(Y->dims[i], Y->factors[i] + j, Y->rank);
637 if (Y->lambdas[j] == 0.0) {
640 cblas_dscal(Y->dims[i], 1.0 / Y->lambdas[j], Y->factors[i] + j,
643 l = cblas_dnrm2(Y->dims[i], Y->factors[i] + j, Y->rank);
648 cblas_dscal(Y->dims[i], 1.0 / l, Y->factors[i] + j, Y->rank);
668 void TransposeM(
double *A,
double *A_T,
long int rowsA,
long int colsA) {
670 for (i = 0; i < rowsA; i++) {
678 cblas_dcopy(colsA, A + i, rowsA, A_T + colsA * i, 1);
690 double *KR, alpha, beta;
692 KR =
reinterpret_cast<double *
>(
693 malloc(
sizeof(
double) * (Y->dims_product / Y->dims[n]) * Y->rank));
699 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, Y->dims[n],
700 (Y->dims_product / Y->dims[n]), Y->rank, alpha, Y->factors[n],
701 Y->rank, KR, Y->rank, beta, T->data, Y->dims[n]);
726 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, Y->dims[n],
727 (Y->dims_product / Y->dims[n]), Y->rank, alpha, Y->factors[n],
728 Y->rank, KR, Y->rank, beta, X, Y->dims[n]);
730 return cblas_dnrm2(Y->dims_product, X, 1);
744 double *MTTKRP,
double *V,
double *S,
745 double tensor_norm) {
746 double dotXY, dotYY, *lambda_array, e;
747 lambda_array =
reinterpret_cast<double *
>(malloc(
sizeof(
double) * Y->rank));
749 dotXY = cblas_ddot(Y->dims[n] * Y->rank, Y->factors[n], 1, MTTKRP, 1);
753 cblas_dsyrk(CblasRowMajor, CblasUpper, CblasTrans, Y->rank, Y->dims[n], 1.0,
754 Y->factors[n], Y->rank, 0.0, S, Y->rank);
758 cblas_dsymv(CblasRowMajor, CblasUpper, Y->rank, 1.0, V, Y->rank, Y->lambdas,
759 1, 0.0, lambda_array, 1);
761 dotYY = cblas_ddot(Y->rank, lambda_array, 1, Y->lambdas, 1);
763 e = ((tensor_norm * tensor_norm) - (2 * dotXY) + dotYY);
776 double *KRP,
long int n) {
779 cblas_dcopy(T->dims_product, T->data, 1, X, 1);
780 cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, Y->dims[n],
781 (Y->dims_product / Y->dims[n]), Y->rank, 1.0, Y->factors[n],
782 Y->rank, KRP, Y->rank, -1.0, X, Y->dims_product / Y->dims[n]);
783 e = cblas_dnrm2(Y->dims_product, X, 1);
798 printf(
"From dims_check():: length < 1, Exit(-1)");
802 printf(
"From dims_check():: dims == NULL, Exit(-2)");
805 for (i = 0; i < length; i++) {
807 printf(
"From dims_check():: dims[%ld] < 1, Exit(-3)", i);
825 nY->nmodes = Y->nmodes - 1;
827 reinterpret_cast<double **
>(malloc(
sizeof(
double *) * nY->nmodes));
828 nY->dims = (
long int *)malloc(
sizeof(
long int) * nY->nmodes);
829 nY->lambdas =
reinterpret_cast<double *
>(malloc(
sizeof(
double) * Y->rank));
831 nY->dims_product = Y->dims_product / Y->dims[n];
833 for (i = 0, j = 0; i < Y->nmodes; i++) {
835 nY->factors[j] = Y->factors[i];
836 nY->dims[j] = Y->dims[i];
851 for (i = Y->nmodes - 1; i >= 0; i--) {
854 if (p != 0) indeces[i] = j / p;
874 long int useful_num_threads =
877 if (Y->dims_product < num_threads) useful_num_threads = Y->dims_product;
879 #pragma omp parallel num_threads(useful_num_threads) 881 long int thread_id = omp_get_thread_num();
899 void Gen_Tensor(ktensor *Y, tensor *T,
long int R,
long int N,
long int *D,
913 double alpha, tensor_norm;
920 Y->dims = (
long int *)malloc(
sizeof(
long int) * Y->nmodes);
922 reinterpret_cast<double **
>(malloc(
sizeof(
double *) * Y->nmodes));
923 for (i = 0; i < Y->nmodes; i++) Y->dims[i] = D[i];
926 for (i = 0; i < Y->nmodes; i++) {
927 Y->factors[i] =
reinterpret_cast<double *
>(
928 malloc(
sizeof(
double) * Y->rank * Y->dims[i]));
929 Y->dims_product *= Y->dims[i];
930 for (j = 0; j < Y->rank * Y->dims[i]; j++) {
932 (
static_cast<double>(rand()) / static_cast<double>(RAND_MAX)) * 2 - 1;
935 Y->lambdas =
reinterpret_cast<double *
>(malloc(
sizeof(
double) * Y->rank));
936 memset(Y->lambdas, 0,
sizeof(
double) * Y->rank);
940 T->dims = (
long int *)malloc(
sizeof(
long int) * Y->nmodes);
942 reinterpret_cast<double *
>(malloc(
sizeof(
double) * Y->dims_product));
943 T->dims_product = Y->dims_product;
945 for (i = 0; i < T->nmodes; i++) T->dims[i] = D[i];
949 tensor_norm = cblas_dnrm2(Y->dims_product, T->data, 1);
952 alpha = (noise * tensor_norm) / sqrt(Y->dims_product);
955 double U1, U2, Z1, Z2;
956 for (i = 0; i < Y->dims_product - 1; i += 2) {
958 U1 = (
static_cast<double>(rand()) / static_cast<double>(RAND_MAX));
959 U2 = (
static_cast<double>(rand()) / static_cast<double>(RAND_MAX));
962 Z1 = sqrt(-2.0 * log(U1)) * cos(2 * M_PI * U2);
963 Z2 = sqrt(-2.0 * log(U1)) * sin(2 * M_PI * U2);
966 Z1 = (Z1 * 2.0) - 1.0;
967 Z2 = (Z2 * 2.0) - 1.0;
969 T->data[i] += alpha * Z1;
970 T->data[i + 1] += alpha * Z2;
972 if ((Y->dims_product % 2) == 0) {
975 U1 = (
static_cast<double>(rand()) / static_cast<double>(RAND_MAX));
976 U2 = (
static_cast<double>(rand()) / static_cast<double>(RAND_MAX));
979 Z1 = sqrt(-2.0 * log(U1)) * cos(2 * M_PI * U2);
982 Z1 = (Z1 * 2.0) - 1.0;
984 T->data[Y->dims_product - 1] += alpha * Z1;
995 if (Y->nmodes == 2) {
1002 long int i, j, *indexers, nmodes;
1003 double *partial_Hadamards;
1007 indexers = (
long int *)malloc(
sizeof(
long int) *
1009 partial_Hadamards =
reinterpret_cast<double *
>(
1010 malloc(
sizeof(
double) * Y->rank *
1014 memset(indexers, 0,
sizeof(
long int) * (nmodes));
1022 vdMul(Y->rank, &Y->factors[nmodes - 2][Y->rank * indexers[nmodes - 2]],
1023 &Y->factors[nmodes - 1][Y->rank * indexers[nmodes - 1]],
1024 &partial_Hadamards[Y->rank * (nmodes - 3)]);
1028 for (i = nmodes - 4; i >= 0; i--)
1029 vdMul(Y->rank, &Y->factors[i + 1][Y->rank * indexers[i + 1]],
1030 &partial_Hadamards[Y->rank * (i + 1)],
1031 &partial_Hadamards[Y->rank * i]);
1034 for (i = start; i < end; i++) {
1042 vdMul(Y->rank, &Y->factors[0][Y->rank * indexers[0]],
1043 &partial_Hadamards[0], &C[Y->rank * (i - start)]);
1046 for (j = 0; j < nmodes; j++) {
1048 if (indexers[j] >= Y->dims[j]) {
1058 free(partial_Hadamards);
1067 long int i, j, *indexers, c;
1069 indexers = (
long int *)malloc(
sizeof(
long int) * Y->nmodes);
1070 memset(indexers, 0,
sizeof(
long int) * (Y->nmodes));
1077 for (; (c < end) && (i < Y->dims[1]); i++) {
1078 if (c != start) j = 0;
1079 for (; (c < end) && (j < Y->dims[0]); j++) {
1089 vdMul(Y->rank, &Y->factors[1][i * Y->rank], &Y->factors[0][j * Y->rank],
1090 &C[(c - start) * Y->rank]);
1098 double *partial_Hadamards) {
1099 long int update_from, i;
1100 long int nmodes = Y->nmodes;
1102 for (i = 0; i < nmodes; i++) {
1104 if (indexers[i] != 0)
break;
1108 if (update_from == 0) {
1110 }
else if (update_from >= nmodes - 2) {
1121 vdMul(Y->rank, &Y->factors[nmodes - 2][Y->rank * indexers[nmodes - 2]],
1122 &Y->factors[nmodes - 1][Y->rank * indexers[nmodes - 1]],
1123 &partial_Hadamards[Y->rank * (nmodes - 3)]);
1125 for (i = nmodes - 4; i >= 0; i--) {
1135 vdMul(Y->rank, &Y->factors[i + 1][Y->rank * indexers[i + 1]],
1136 &partial_Hadamards[Y->rank * (i + 1)],
1137 &partial_Hadamards[Y->rank * i]);
1141 for (i = update_from - 1; i >= 0; i--) {
1146 vdMul(Y->rank, &Y->factors[i + 1][Y->rank * indexers[i + 1]],
1147 &partial_Hadamards[Y->rank * (i + 1)],
1148 &partial_Hadamards[Y->rank * i]);
1163 long int iter_space,
long int *start,
1171 limit = iter_space % num_threads;
1172 b = iter_space / num_threads;
1174 if (thread_id < limit) {
1175 *start = thread_id * (b + 1);
1176 *end = *start + (b + 1);
1178 *start = (limit * (b + 1)) + ((thread_id - limit) * b);
1190 for (i = 0; i < Y->nmodes; i++) {
1191 free(Y->factors[i]);
1214 inputs->rank = atoi(argv[j++]);
1216 inputs->num_threads = atoi(argv[j++]);
1217 inputs->max_iters = atoi(argv[j++]);
1218 inputs->tolerance = atof(argv[j++]);
1219 inputs->nmodes = atoi(argv[j++]);
1223 printf(
"N=%ld\n", inputs->nmodes);
1224 inputs->dims = (
long int *)malloc(
sizeof(
long int) * inputs->nmodes);
1225 for (i = 0; i < inputs->nmodes; i++) inputs->dims[i] = atoi(argv[offset + i]);
1227 if (inputs->num_threads < 1) {
1229 "From process_inputs(), num_threads = %ld, num_threads must be at least " 1231 inputs->num_threads);
1234 if (inputs->rank < 1) {
1235 printf(
"From process_inputs(), rank = %ld, rank must be at least 1\nExit\n",
1241 printf(
"****************************************\n");
1242 printf(
"Processed inputs:\n");
1243 printf(
"inputs->rank = %ld\n", inputs->rank);
1244 printf(
"inputs->num_threads = %ld\n", inputs->num_threads);
1245 printf(
"inputs->max_iters = %ld\n", inputs->max_iters);
1246 printf(
"inputs->tolerance = %.20lf\n", inputs->tolerance);
1247 printf(
"inputs->nmodes = %ld\n", inputs->nmodes);
1248 printf(
"inputs->dims[%ld", inputs->dims[0]);
1249 for (i = 1; i < inputs->nmodes; i++) {
1250 printf(
",%ld", inputs->dims[i]);
1253 printf(
"****************************************\n");
1268 if (D == ::direction::left) {
1272 nY->nmodes = Y->nmodes - (n + 1);
1277 reinterpret_cast<double **
>(malloc(
sizeof(
double *) * nY->nmodes));
1278 nY->dims = (
long int *)malloc(
sizeof(
long int) * nY->nmodes);
1280 nY->lambdas =
reinterpret_cast<double *
>(malloc(
sizeof(
double) * nY->rank));
1282 for (i = 0; i < nY->nmodes; i++) {
1283 nY->factors[i] = Y->factors[i + jump];
1284 nY->dims[i] = Y->dims[i + jump];
1286 for (i = 0; i < nY->rank; i++) nY->lambdas[i] = Y->lambdas[i];
1287 for (i = 0, nY->dims_product = 1; i < nY->nmodes; i++)
1288 nY->dims_product *= nY->dims[i];
1301 if (D == ::direction::left) {
1305 nY->nmodes = Y->nmodes - (n + 1);
1310 nY->dims_product = 1;
1311 for (i = 0; i < nY->nmodes; i++) {
1312 nY->factors[i] = Y->factors[i + jump];
1313 nY->dims[i] = Y->dims[i + jump];
1314 nY->dims_product *= nY->dims[i];
1316 for (i = 0; i < nY->rank; i++) nY->lambdas[i] = Y->lambdas[i];
1326 if (n < 0 || n >= Y->nmodes) {
1327 printf(
"In remove_mode_Ktensor() invalid value of n == %ld\nExit\n", n);
1332 for (i = 0, j = 0; i < Y->nmodes + 1; j++) {
1336 Y->dims[i] = Y->dims[j];
1337 Y->factors[i] = Y->factors[j];
1351 if ((clear_factors != 0) && (clear_factors != 1)) {
1353 "clear_factors argument in destruct_Ktensor() was not 0 or 1. Return. " 1359 if (clear_factors == 1) {
1360 for (i = 0; i < Y->nmodes; i++) free(Y->factors[i]);
1362 for (i = 0; i < Y->nmodes; i++) Y->factors[i] = NULL;
1381 t1->data = t2->data;
1391 if (D == ::direction::left) {
1396 nT->nmodes = T->nmodes - jump;
1402 nT->dims_product = 1;
1403 for (i = 0; i < nT->nmodes; i++) {
1404 nT->dims[i] = T->dims[i + jump];
1405 nT->dims_product *= nT->dims[i];
1416 nY->nmodes = Y->nmodes;
1419 nY->dims = (
long int *)malloc(
sizeof(
long int) * nY->nmodes);
1421 reinterpret_cast<double **
>(malloc(
sizeof(
double *) * nY->nmodes));
1422 nY->lambdas =
reinterpret_cast<double *
>(malloc(
sizeof(
double) * nY->rank));
1424 nY->dims_product = 1;
1425 for (i = 0; i < nY->nmodes; i++) {
1426 nY->dims[i] = Y->dims[i];
1427 nY->factors[i] = Y->factors[i];
1428 nY->dims_product *= nY->dims[i];
1431 cblas_dcopy(nY->rank, Y->lambdas, 1, nY->lambdas, 1);
1435 if (D != ::direction::left && D != ::direction::right) {
1436 printf(
"In opposite_direction input D was invalid!\nExit\n");
1440 if (D == ::direction::left) {
1441 return ::direction::right;
1443 return ::direction::left;
1447 #endif // DIMTREE_DDTTENSOR_HPP_ void reorder_Ktensor(ktensor *Y, ktensor *nY, long int n)
void normalize_Factor_Matrix_RowMajor(ktensor *Y, long int n)
void ktensor_copy_constructor(ktensor *Y, ktensor *nY)
long int CompareM(double *A, double *B, long int nRows, long int nCols, double eps)
void Upper_Hadamard_RowMajor(long int nRows, long int nCols, double *A, double *B, double *C)
void process_inputs(long int argc, char *argv[], tensor_inputs *inputs)
void printM_RowMajor(double *M, long int num_cols, long int num_rows)
void dims_check(long int *dims, long int length)
void tensor_data_swap(tensor *t1, tensor *t2)
void LR_tensor_Reduction(tensor *T, tensor *nT, long int n, direction D)
void Full_nMode_Matricization_RowMajor(tensor *T, ktensor *Y, long int n)
double CP_ALS_efficient_error_computation(ktensor *Y, long int n, double *MTTKRP, double *V, double *S, double tensor_norm)
CP_ALS_efficient_error_computation(); needs to compute dotXX - 2*dot XY + dotYY 1) Y...
void Gen_Tensor(ktensor *Y, tensor *T, long int R, long int N, long int *D, double noise)
double CP_ALS_naive_error_computation(tensor *T, ktensor *Y, double *X, double *KRP, long int n)
Computes the error between T and Y.
void destruct_Ktensor(ktensor *Y, long int clear_factors)
void update_Partial_Hadamards(ktensor *Y, long int *indexers, double *partial_Hadamards, double **reordered_Factors)
void print_tensor(tensor *T, long int show_data)
void KR_RowMajor(ktensor *Y, double *C, long int n)
void clean_Up_Gen_Tensor(ktensor *Y, tensor *T)
void printM_ColMajor(double *M, long int num_cols, long int num_rows)
void LR_Ktensor_Reordering_existingY(ktensor *Y, ktensor *nY, long int n, direction D)
void destroy_inputs(tensor_inputs *inputs)
void Multi_KR_RowMajor(ktensor *Y, double *C, long int n)
double approximation_Error(double *X, double *KR, ktensor *Y, long int n)
direction opposite_direction(direction D)
void vdMul(long int n, double *a, double *b, double *c)
void print_dgemv_inputs(CBLAS_ORDER dgemv_layout, CBLAS_TRANSPOSE transA, long int m, long int n, double alpha, long int T_offset, long int tensor_stride, long int A_offset, long int A_stride, double beta, long int output_col_stride, long int output_stride)
void do_SYRKs_RowMajor(ktensor *Y, double **SYRKs)
void reorder_Factors(ktensor *Y, double **reordered_Factors, long int *reordered_Dims, long int n)
void destruct_Tensor(tensor *T)
void normalize_Ktensor_RowMajor(ktensor *Y)
void parallel_update_Partial_Hadamards(ktensor *Y, long int *indexers, double *partial_Hadamards)
void TransposeM(double *A, double *A_T, long int rowsA, long int colsA)
void print_Ktensor_RowMajor(ktensor *Y)
void compute_KRP_Indices(long int j, ktensor *Y, long int *indeces)
void wrapper_Parallel_Multi_revKRP(ktensor *Y, long int num_threads, double *KRP)
Wrapper function for the reverse KRP of a ktensor Y 1) Ktensor object 2) number of threads to use 3)...
void LR_Ktensor_Reordering_newY(ktensor *Y, ktensor *nY, long int n, direction D)
void MHada_RowMajor(ktensor *Y, double **SYRKs, double *V, long int n)
void remove_mode_Ktensor(ktensor *Y, long int n)
void print_dgemm_inputs(CBLAS_ORDER dgemm_layout, CBLAS_TRANSPOSE transA, CBLAS_TRANSPOSE transB, long int m, long int n, long int k, double alpha, long int strideA, long int strideB, double beta, long int strideC)
void MTTKRP_RowMajor(tensor *T, double *K, double *C, long int rank, long int n)
void parallel_Multi_KR_RowMajor(ktensor *Y, double *C, long int start, long int end)
void compute_Iteration_Space(long int thread_id, long int num_threads, long int iter_space, long int *start, long int *end)
void parallel_KR_RowMajor(ktensor *Y, double *C, long int start, long int end)
parallel_KR_RowMajor(); Does the parallel KRP of the factor matrices in the ktensor Y ...