planc
Parallel Lowrank Approximation with Non-negativity Constraints
npyio.hpp
Go to the documentation of this file.
1 /* Copyright 2018 Ramakrishnan Kannan */
2 #ifndef COMMON_NPYIO_HPP_
3 #define COMMON_NPYIO_HPP_
4 #include <armadillo>
5 #include <cassert>
6 #include <cstdio>
7 #include <string>
8 #include <vector>
9 #include "common/tensor.hpp"
10 #include "common/utils.h"
11 
12 namespace planc {
13 class NumPyArray {
14  private:
15  int64_t m_word_size;
16  bool m_fortran_order;
17  int64_t m_modes;
18  UVEC m_dims;
19  void parse_npy_header(FILE* fp) {
20  char buffer[256];
21  int64_t res = fread(buffer, sizeof(char), 11, fp);
22  if (res != 11) {
23  ERR << "Something wrong. Could not read header " << std::endl;
24  exit(-1);
25  }
26  buffer[11] = 0;
27  std::cout << "first 11 characters::" << buffer << std::endl;
28 
29  std::string header = fgets(buffer, 256, fp);
30  assert(header[header.size() - 1] == '\n');
31 
32  // fortran order is column major order
33  // C order is row major order
34  int64_t loc1 = header.find("fortran_order");
35  loc1 += 16;
36  this->m_fortran_order = (header.substr(loc1, 4) == "True" ? true : false);
37 
38  // obtain dimensions
39  loc1 = header.find("(");
40  int64_t loc2 = header.find(")");
41  if (loc1 < 0 || loc2 < 0) {
42  ERR << "could not find ()" << std::endl;
43  exit(-1);
44  }
45 
46  std::string str_shape = header.substr(loc1 + 1, loc2 - loc1 - 1);
47  if (str_shape[str_shape.size() - 1] == ',') {
48  this->m_modes = 1;
49  } else {
50  this->m_modes = std::count(str_shape.begin(), str_shape.end(), ',') + 1;
51  }
52  this->m_dims = arma::zeros<UVEC>(m_modes);
53 
54  std::stringstream ss(str_shape);
55  std::string s;
56  ss.str(str_shape);
57  int64_t i = 0;
58  while (getline(ss, s, ',')) {
59  this->m_dims[i++] = ::atoi(s.c_str());
60  }
61  // endian, word size, data type
62  // byte order code | stands for not applicable.
63  // not sure when this applies except for byte array
64  loc1 = header.find("descr");
65  loc1 += 9;
66  bool littleEndian =
67  (header[loc1] == '<' || header[loc1] == '|' ? true : false);
68  assert(littleEndian);
69 
70  std::string word_size = header.substr(loc1 + 2);
71  loc2 = word_size.find("'");
72  this->m_word_size = atoi(word_size.substr(0, loc2).c_str());
73  }
74 
75  public:
78  this->m_word_size = 0;
79  this->m_fortran_order = false;
80  this->m_modes = 0;
81  }
82  void load(std::string fname) {
83  FILE* fp = fopen(fname.c_str(), "rb");
84  if (fp == NULL) {
85  ERR << "Could not load the file " << fname << std::endl;
86  exit(-1);
87  }
88  parse_npy_header(fp);
89  this->m_input_tensor = new Tensor(this->m_dims);
90  int64_t nread = fread(&m_input_tensor->m_data[0],
91  sizeof(std::vector<double>::value_type),
92  m_input_tensor->numel(), fp);
93  if (nread != m_input_tensor->numel()) {
94  WARN << "something wrong ::read::" << nread
95  << "::numel::" << this->m_input_tensor->numel()
96  << "::word_size::" << this->m_word_size << std::endl;
97  }
98  }
99  void printInfo() {
100  INFO << "modes::" << this->m_modes << "::dims::" << std::endl
101  << this->m_dims << "::fortran_order::" << this->m_fortran_order
102  << "::word_size::" << this->m_word_size << std::endl;
103  }
104 };
105 } // namespace planc
106 
107 #endif // COMMON_NPYIO_HPP_
Data is stored such that the unfolding is column major.
Definition: tensor.hpp:32
std::vector< double > m_data
Definition: tensor.hpp:73
#define UVEC
Definition: utils.h:58
Tensor * m_input_tensor
Definition: npyio.hpp:76
void load(std::string fname)
Definition: npyio.hpp:82
#define ERR
Definition: utils.h:28
#define INFO
Definition: utils.h:36
void printInfo()
Definition: npyio.hpp:99
UWORD numel() const
Returns total number of elements.
Definition: tensor.hpp:172
ncp_factors contains the factors of the ncp every ith factor is of size n_i * k number of factors is ...
Definition: ncpfactors.hpp:20
#define WARN
Definition: utils.h:32