Loading...
Searching...
No Matches
cnpy.H
Go to the documentation of this file.
1//Copyright (C) 2011 Carl Rogers
2//Copyright (C) 2017 Matheus Gadelha
3//Copyright (C) 2019 Giovanni Stabile
4//Released under MIT License
5//license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
6
7#ifndef LIBCNPY_H_
8#define LIBCNPY_H_
9
10#include <Eigen/Eigen>
11#include <unsupported/Eigen/CXX11/Tensor>
12#include<string>
13#include<stdexcept>
14#include<sstream>
15#include<vector>
16#include<cstdio>
17#include<typeinfo>
18#include<iostream>
19#include<cassert>
20#include<zlib.h>
21#include<map>
22#include<memory>
23#include<stdint.h>
24#include<numeric>
25#include <regex>
26
27
28namespace cnpy
29{
30
32{
33 NpyArray(const std::vector<size_t>& _shape, size_t _word_size,
34 bool _fortran_order, std::string _number_type) :
35 shape(_shape), word_size(_word_size), fortran_order(_fortran_order),
36 number_type(_number_type)
37 {
38 num_vals = 1;
39
40 for (size_t i = 0; i < shape.size(); i++)
41 {
42 num_vals *= shape[i];
43 }
44
45 data_holder = std::shared_ptr<std::vector<char>>(
46 new std::vector<char>(num_vals * word_size));
47 }
48
50
51 template<typename T>
53 {
54 return reinterpret_cast<T*>(&(*data_holder)[0]);
55 }
56
57 template<typename T>
58 const T* data() const
59 {
60 return reinterpret_cast<T*>(&(*data_holder)[0]);
61 }
62
63 template<typename T>
64 std::vector<T> as_vec()
65 {
66 if (number_type == "f4")
67 {
68 float* p = data<float>();
69 return std::vector<T>(static_cast<float*>(p),
70 static_cast<float*>(p) + num_vals);
71 }
72
73 if (number_type == "f8")
74 {
75 double* p = data<double>();
76 return std::vector<T>(static_cast<double*>(p),
77 static_cast<double*>(p) + num_vals);
78 }
79
80 if (number_type == "f16")
81 {
82 long double* p = data<long double>();
83 return std::vector<T>(static_cast<long double*>(p),
84 static_cast<long double*>(p) + num_vals);
85 }
86
87 if (number_type == "i2")
88 {
89 short* p = data<short>();
90 return std::vector<T>(static_cast<short*>(p),
91 static_cast<short*>(p) + num_vals);
92 }
93
94 if (number_type == "i4")
95 {
96 int* p = data<int>();
97 return std::vector<T>(static_cast<int*>(p), static_cast<int*>(p) + num_vals);
98 }
99
100 if (number_type == "i8")
101 {
102 long* p = data<long>();
103 return std::vector<T>(static_cast<long*>(p), static_cast<long*>(p) + num_vals);
104 }
105
106 // if (number_type == "c8")
107 // {
108 // _Complex float* p = data<_Complex float>();
109 // return std::vector<T>(static_cast<_Complex float*>(p), static_cast<_Complex float*>(p) + num_vals);
110 // }
111 // if (number_type == "c16")
112 // {
113 // std::complex<double>* p = data<std::complex<double>>();
114 // return std::vector<T>(static_cast<std::complex<double>*>(p), static_cast<std::complex<double>*>(p) + num_vals);
115 // }
116 // if (number_type == "c32")
117 // {
118 // std::complex<long double>* p = data<std::complex<long double>>();
119 // return std::vector<T>(static_cast<std::complex<long double>*>(p), static_cast<std::complex<long double>*>(p) + num_vals);
120 // }
121 if (number_type == "u2")
122 {
123 unsigned short* p = data<unsigned short>();
124 return std::vector<T>(static_cast<unsigned short*>(p),
125 static_cast<unsigned short*>(p) + num_vals);
126 }
127
128 if (number_type == "u4")
129 {
130 unsigned int* p = data<unsigned int>();
131 return std::vector<T>(static_cast<unsigned int*>(p),
132 static_cast<unsigned int*>(p) + num_vals);
133 }
134
135 if (number_type == "u8")
136 {
137 unsigned long* p = data<unsigned long>();
138 return std::vector<T>(static_cast<unsigned long*>(p),
139 static_cast<unsigned long*>(p) + num_vals);
140 }
141
142 if (number_type == "f8")
143 {
144 double* p = data<double>();
145 return std::vector<T>(static_cast<double*>(p),
146 static_cast<double*>(p) + num_vals);
147 }
148 }
149
150 size_t num_bytes() const
151 {
152 return data_holder->size();
153 }
154
155 std::shared_ptr<std::vector<char>> data_holder;
156 std::vector<size_t> shape;
157 size_t word_size;
159 std::string number_type;
160 size_t num_vals;
161};
162
163using npz_t = std::map<std::string, NpyArray>;
164
165char BigEndianTest();
166char map_type(const std::type_info& t);
167template<typename T> std::vector<char> create_npy_header(
168 const std::vector<size_t>& shape);
169void parse_npy_header(FILE* fp, size_t& word_size, std::vector<size_t>& shape,
170 bool& fortran_order, std::string& number_type);
171void parse_npy_header(unsigned char* buffer, size_t& word_size,
172 std::vector<size_t>& shape, bool& fortran_order, std::string& number_type);
173void parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size,
174 size_t& global_header_offset);
175npz_t npz_load(std::string fname);
176NpyArray npz_load(std::string fname, std::string varname);
177NpyArray npy_load(std::string fname);
178
179template<typename T> std::vector<char>& operator+=(std::vector<char>& lhs,
180 const T rhs)
181{
182 //write in little endian
183 for (size_t byte = 0; byte < sizeof(T); byte++)
184 {
185 char val = *((char*)&rhs + byte);
186 lhs.push_back(val);
187 }
188
189 return lhs;
190}
191
192template<> std::vector<char>& operator+=(std::vector<char>& lhs,
193 const std::string rhs);
194template<> std::vector<char>& operator+=(std::vector<char>& lhs,
195 const char* rhs);
196
197
198template<typename T> void npy_save(std::string fname, const T* data,
199 const std::vector<size_t> shape, std::string mode = "w")
200{
201 FILE* fp = NULL;
202 std::vector<size_t>
203 true_data_shape; //if appending, the shape of existing + new data
204
205 if (mode == "a")
206 {
207 fp = fopen(fname.c_str(), "r+b");
208 }
209
210 if (fp)
211 {
212 //file exists. we need to append to it. read the header, modify the array size
213 size_t word_size;
214 bool fortran_order;
215 std::string number_type;
216 parse_npy_header(fp, word_size, true_data_shape, fortran_order, number_type);
217 assert(!fortran_order);
218
219 if (word_size != sizeof(T))
220 {
221 std::cout << "libnpy error: " << fname << " has word size " << word_size <<
222 " but npy_save appending data sized " << sizeof(T) << "\n";
223 assert( word_size == sizeof(T) );
224 }
225
226 if (true_data_shape.size() != shape.size())
227 {
228 std::cout <<
229 "libnpy error: npy_save attempting to append misdimensioned data to " << fname
230 << "\n";
231 assert(true_data_shape.size() != shape.size());
232 }
233
234 for (size_t i = 1; i < shape.size(); i++)
235 {
236 if (shape[i] != true_data_shape[i])
237 {
238 std::cout << "libnpy error: npy_save attempting to append misshaped data to " <<
239 fname << "\n";
240 assert(shape[i] == true_data_shape[i]);
241 }
242 }
243
244 true_data_shape[0] += shape[0];
245 }
246 else
247 {
248 fp = fopen(fname.c_str(), "wb");
249 true_data_shape = shape;
250 }
251
252 std::vector<char> header = create_npy_header<T>(true_data_shape);
253 size_t nels = std::accumulate(shape.begin(), shape.end(), 1,
254 std::multiplies<size_t>());
255 fseek(fp, 0, SEEK_SET);
256 fwrite(&header[0], sizeof(char), header.size(), fp);
257 fseek(fp, 0, SEEK_END);
258 fwrite(data, sizeof(T), nels, fp);
259 fclose(fp);
260}
261
262template<typename T> void npz_save(std::string zipname, std::string fname,
263 const T* data, const std::vector<size_t>& shape, std::string mode = "w")
264{
265 //first, append a .npy to the fname
266 fname += ".npy";
267 //now, on with the show
268 FILE* fp = NULL;
269 uint16_t nrecs = 0;
270 size_t global_header_offset = 0;
271 std::vector<char> global_header;
272
273 if (mode == "a")
274 {
275 fp = fopen(zipname.c_str(), "r+b");
276 }
277
278 if (fp)
279 {
280 //zip file exists. we need to add a new npy file to it.
281 //first read the footer. this gives us the offset and size of the global header
282 //then read and store the global header.
283 //below, we will write the the new data at the start of the global header then append the global header and footer below it
284 size_t global_header_size;
285 parse_zip_footer(fp, nrecs, global_header_size, global_header_offset);
286 fseek(fp, global_header_offset, SEEK_SET);
287 global_header.resize(global_header_size);
288 size_t res = fread(&global_header[0], sizeof(char), global_header_size, fp);
289
290 if (res != global_header_size)
291 {
292 throw std::runtime_error("npz_save: header read error while adding to existing zip");
293 }
294
295 fseek(fp, global_header_offset, SEEK_SET);
296 }
297 else
298 {
299 fp = fopen(zipname.c_str(), "wb");
300 }
301
302 std::vector<char> npy_header = create_npy_header<T>(shape);
303 size_t nels = std::accumulate(shape.begin(), shape.end(), 1,
304 std::multiplies<size_t>());
305 size_t nbytes = nels * sizeof(T) + npy_header.size();
306 //get the CRC of the data to be added
307 uint32_t crc = crc32(0L, (uint8_t*)&npy_header[0], npy_header.size());
308 crc = crc32(crc, (uint8_t*)data, nels * sizeof(T));
309 //build the local header
310 std::vector<char> local_header;
311 local_header += "PK"; //first part of sig
312 local_header += (uint16_t) 0x0403; //second part of sig
313 local_header += (uint16_t) 20; //min version to extract
314 local_header += (uint16_t) 0; //general purpose bit flag
315 local_header += (uint16_t) 0; //compression method
316 local_header += (uint16_t) 0; //file last mod time
317 local_header += (uint16_t) 0; //file last mod date
318 local_header += (uint32_t) crc; //crc
319 local_header += (uint32_t) nbytes; //compressed size
320 local_header += (uint32_t) nbytes; //uncompressed size
321 local_header += (uint16_t) fname.size(); //fname length
322 local_header += (uint16_t) 0; //extra field length
323 local_header += fname;
324 //build global header
325 global_header += "PK"; //first part of sig
326 global_header += (uint16_t) 0x0201; //second part of sig
327 global_header += (uint16_t) 20; //version made by
328 global_header.insert(global_header.end(), local_header.begin() + 4,
329 local_header.begin() + 30);
330 global_header += (uint16_t) 0; //file comment length
331 global_header += (uint16_t) 0; //disk number where file starts
332 global_header += (uint16_t) 0; //internal file attributes
333 global_header += (uint32_t) 0; //external file attributes
334 global_header += (uint32_t)
335 global_header_offset; //relative offset of local file header, since it begins where the global header used to begin
336 global_header += fname;
337 //build footer
338 std::vector<char> footer;
339 footer += "PK"; //first part of sig
340 footer += (uint16_t) 0x0605; //second part of sig
341 footer += (uint16_t) 0; //number of this disk
342 footer += (uint16_t) 0; //disk where footer starts
343 footer += (uint16_t) (nrecs + 1); //number of records on this disk
344 footer += (uint16_t) (nrecs + 1); //total number of records
345 footer += (uint32_t) global_header.size(); //nbytes of global headers
346 footer += (uint32_t) (global_header_offset + nbytes +
347 local_header.size()); //offset of start of global headers, since global header now starts after newly written array
348 footer += (uint16_t) 0; //zip file comment length
349 //write everything
350 fwrite(&local_header[0], sizeof(char), local_header.size(), fp);
351 fwrite(&npy_header[0], sizeof(char), npy_header.size(), fp);
352 fwrite(data, sizeof(T), nels, fp);
353 fwrite(&global_header[0], sizeof(char), global_header.size(), fp);
354 fwrite(&footer[0], sizeof(char), footer.size(), fp);
355 fclose(fp);
356}
357
358template<typename T> void npy_save(std::string fname, const std::vector<T> data,
359 std::string mode = "w")
360{
361 std::vector<size_t> shape;
362 shape.push_back(data.size());
363 npy_save(fname, &data[0], shape, mode);
364}
365
366template<typename T> void npz_save(std::string zipname, std::string fname,
367 const std::vector<T> data, std::string mode = "w")
368{
369 std::vector<size_t> shape;
370 shape.push_back(data.size());
371 npz_save(zipname, fname, &data[0], shape, mode);
372}
373
374template<typename T> std::vector<char> create_npy_header(
375 const std::vector<size_t>& shape)
376{
377 std::vector<char> dict;
378 dict += "{'descr': '";
379 dict += BigEndianTest();
380 dict += map_type(typeid(T));
381 dict += std::to_string(sizeof(T));
382 dict += "', 'fortran_order': False, 'shape': (";
383 dict += std::to_string(shape[0]);
384
385 for (size_t i = 1; i < shape.size(); i++)
386 {
387 dict += ", ";
388 dict += std::to_string(shape[i]);
389 }
390
391 if (shape.size() == 1)
392 {
393 dict += ",";
394 }
395
396 dict += "), }";
397 //pad with spaces so that preamble+dict is modulo 16 bytes. preamble is 10 bytes. dict needs to end with \n
398 int remainder = 16 - (10 + dict.size()) % 16;
399 dict.insert(dict.end(), remainder, ' ');
400 dict.back() = '\n';
401 std::vector<char> header;
402 header += (char) 0x93;
403 header += "NUMPY";
404 header += (char) 0x01; //major version of numpy format
405 header += (char) 0x00; //minor version of numpy format
406 header += (uint16_t) dict.size();
407 header.insert(header.end(), dict.begin(), dict.end());
408 return header;
409}
410
411template<typename T, int dim>
412Eigen::Matrix < T, -1, dim > load(Eigen::Matrix < T, -1, dim > & mat,
413 const std::string fname, std::string order = "rowMajor");
414
415template<typename T>
416Eigen::SparseMatrix<T> load(Eigen::SparseMatrix<T>& mat,
417 const std::string fname);
418
419template<typename T>
420Eigen::Tensor<T, 3> load(Eigen::Tensor<T, 3>& tens,
421 const std::string fname, std::string order = "rowMajor");
422
423template<typename T, int dim>
424void save(const Eigen::Matrix < T, -1, dim > & mat, const std::string fname);
425
426template<typename T>
427void save(const Eigen::Tensor<T, 3>& tensor, const std::string fname);
428
429template<typename T>
430void save(const Eigen::SparseMatrix<T>& mat, const std::string fname);
431
433cnpy::NpyArray load_the_npy_file(std::string fname);
434
435}
436
437#endif
volScalarField & T
Definition createT.H:46
Definition cnpy.H:29
void parse_zip_footer(FILE *fp, uint16_t &nrecs, size_t &global_header_size, size_t &global_header_offset)
Definition cnpy.C:240
void save(const Eigen::Matrix< T, -1, dim > &mat, const std::string fname)
std::map< std::string, NpyArray > npz_t
Definition cnpy.H:163
cnpy::NpyArray load_the_npy_file(FILE *fp)
Definition cnpy.C:266
char map_type(const std::type_info &t)
Definition cnpy.C:21
Eigen::Matrix< T, -1, dim > load(Eigen::Matrix< T, -1, dim > &mat, const std::string fname, std::string order="rowMajor")
std::vector< char > create_npy_header(const std::vector< size_t > &shape)
Definition cnpy.H:374
char BigEndianTest()
Definition cnpy.C:15
npz_t npz_load(std::string fname)
Definition cnpy.C:329
void npy_save(std::string fname, const T *data, const std::vector< size_t > shape, std::string mode="w")
Definition cnpy.H:198
NpyArray npy_load(std::string fname)
Definition cnpy.C:464
void npz_save(std::string zipname, std::string fname, const T *data, const std::vector< size_t > &shape, std::string mode="w")
Definition cnpy.H:262
void parse_npy_header(FILE *fp, size_t &word_size, std::vector< size_t > &shape, bool &fortran_order, std::string &number_type)
Definition cnpy.C:175
std::vector< char > & operator+=(std::vector< char > &lhs, const T rhs)
Definition cnpy.H:179
volScalarField & p
label i
Definition pEqn.H:46
bool fortran_order
Definition cnpy.H:158
std::shared_ptr< std::vector< char > > data_holder
Definition cnpy.H:155
std::vector< T > as_vec()
Definition cnpy.H:64
const T * data() const
Definition cnpy.H:58
T * data()
Definition cnpy.H:52
std::vector< size_t > shape
Definition cnpy.H:156
std::string number_type
Definition cnpy.H:159
size_t num_vals
Definition cnpy.H:160
size_t word_size
Definition cnpy.H:157
NpyArray(const std::vector< size_t > &_shape, size_t _word_size, bool _fortran_order, std::string _number_type)
Definition cnpy.H:33
size_t num_bytes() const
Definition cnpy.H:150