Loading...
Searching...
No Matches
cnpy.H
1//Copyright (C) 2011 Carl Rogers
2//Copyright (C) 2017 Matheus Gadelha
3//Copyright (C) 2019 Giovanni Stabile
4//Released under MIT License
5//license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
6
7#ifndef LIBCNPY_H_
8#define LIBCNPY_H_
9
10#include <Eigen/Eigen>
11#include <unsupported/Eigen/CXX11/Tensor>
12#include<string>
13#include<stdexcept>
14#include<sstream>
15#include<vector>
16#include<cstdio>
17#include<typeinfo>
18#include<iostream>
19#include<cassert>
20#include<zlib.h>
21#include<map>
22#include<memory>
23#include<stdint.h>
24#include<numeric>
25#include <regex>
26#include<cstring>
27#include "IFstream.H"
28
29
30namespace cnpy
31{
32
33struct NpyArray
34{
35 NpyArray(const std::vector<size_t>& _shape, size_t _word_size,
36 bool _fortran_order, std::string _number_type) :
37 shape(_shape), word_size(_word_size), fortran_order(_fortran_order),
38 number_type(_number_type)
39 {
40 num_vals = 1;
41
42 for (size_t i = 0; i < shape.size(); i++)
43 {
44 num_vals *= shape[i];
45 }
46
47 data_holder = std::shared_ptr<std::vector<char >> (
48 new std::vector<char>(num_vals * word_size));
49 }
50
51 NpyArray() : shape(0), word_size(0), fortran_order(0), num_vals(0) { }
52
53 template<typename T>
54 T* data()
55 {
56 return reinterpret_cast<T*>(& (* data_holder)[0]);
57 }
58
59 template<typename T>
60 const T* data() const
61 {
62 return reinterpret_cast<T*>(& (* data_holder)[0]);
63 }
64
65 template<typename T>
66 std::vector<T> as_vec()
67 {
68 if (number_type == "f4")
69 {
70 float* p = data<float>();
71 return std::vector<T>(static_cast<float*>(p),
72 static_cast<float*>(p) + num_vals);
73 }
74
75 if (number_type == "f8")
76 {
77 double* p = data<double>();
78 return std::vector<T>(static_cast<double*>(p),
79 static_cast<double*>(p) + num_vals);
80 }
81
82 if (number_type == "f16")
83 {
84 long double* p = data<long double>();
85 return std::vector<T>(static_cast<long double*>(p),
86 static_cast<long double*>(p) + num_vals);
87 }
88
89 if (number_type == "i2")
90 {
91 short* p = data<short>();
92 return std::vector<T>(static_cast<short*>(p),
93 static_cast<short*>(p) + num_vals);
94 }
95
96 if (number_type == "i4")
97 {
98 int* p = data<int>();
99 return std::vector<T>(static_cast<int*>(p), static_cast<int*>(p) + num_vals);
100 }
101
102 if (number_type == "i8")
103 {
104 long* p = data<long>();
105 return std::vector<T>(static_cast<long*>(p), static_cast<long*>(p) + num_vals);
106 }
107
108 // if (number_type == "c8")
109 // {
110 // _Complex float* p = data<_Complex float>();
111 // return std::vector<T>(static_cast<_Complex float*>(p), static_cast<_Complex float*>(p) + num_vals);
112 // }
113 // if (number_type == "c16")
114 // {
115 // std::complex<double>* p = data<std::complex<double>>();
116 // return std::vector<T>(static_cast<std::complex<double>*>(p), static_cast<std::complex<double>*>(p) + num_vals);
117 // }
118 // if (number_type == "c32")
119 // {
120 // std::complex<long double>* p = data<std::complex<long double>>();
121 // return std::vector<T>(static_cast<std::complex<long double>*>(p), static_cast<std::complex<long double>*>(p) + num_vals);
122 // }
123 if (number_type == "u2")
124 {
125 unsigned short* p = data<unsigned short>();
126 return std::vector<T>(static_cast<unsigned short*>(p),
127 static_cast<unsigned short*>(p) + num_vals);
128 }
129
130 if (number_type == "u4")
131 {
132 unsigned int* p = data<unsigned int>();
133 return std::vector<T>(static_cast<unsigned int*>(p),
134 static_cast<unsigned int*>(p) + num_vals);
135 }
136
137 if (number_type == "u8")
138 {
139 unsigned long* p = data<unsigned long>();
140 return std::vector<T>(static_cast<unsigned long*>(p),
141 static_cast<unsigned long*>(p) + num_vals);
142 }
143
144 if (number_type == "f8")
145 {
146 double* p = data<double>();
147 return std::vector<T>(static_cast<double*>(p),
148 static_cast<double*>(p) + num_vals);
149 }
150 }
151
152 size_t num_bytes() const
153 {
154 return data_holder->size();
155 }
156
157 std::shared_ptr<std::vector<char >> data_holder;
158 std::vector<size_t> shape;
159 size_t word_size;
160 bool fortran_order;
161 std::string number_type;
162 size_t num_vals;
163};
164
165using npz_t = std::map<std::string, NpyArray>;
166
167char BigEndianTest();
168char map_type(const std::type_info& t);
169template<typename T> std::vector<char> create_npy_header(
170 const std::vector<size_t>& shape);
171void parse_npy_header(FILE* fp, size_t& word_size, std::vector<size_t>& shape,
172 bool& fortran_order, std::string& number_type);
173void parse_npy_header(unsigned char* buffer, size_t& word_size,
174 std::vector<size_t>& shape, bool& fortran_order, std::string& number_type);
175void parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size,
176 size_t& global_header_offset);
177npz_t npz_load(std::string fname);
178NpyArray npz_load(std::string fname, std::string varname);
179NpyArray npy_load(std::string fname);
180
181template<typename T> std::vector<char>& operator+=(std::vector<char>& lhs,
182 const T rhs)
183{
184 //write in little endian
185 for (size_t byte = 0; byte < sizeof(T); byte++)
186 {
187 char val = * ((char*) & rhs + byte);
188 lhs.push_back(val);
189 }
190
191 return lhs;
192}
193
194template<> std::vector<char>& operator+=(std::vector<char>& lhs,
195 const std::string rhs);
196template<> std::vector<char>& operator+=(std::vector<char>& lhs,
197 const char* rhs);
198
199
200template<typename T> void npy_save(std::string fname, const T* data,
201 const std::vector<size_t> shape, std::string mode = "w")
202{
203 FILE* fp = NULL;
204 std::vector<size_t>
205 true_data_shape; //if appending, the shape of existing + new data
206
207 if (mode == "a")
208 {
209 fp = fopen(fname.c_str(), "r+b");
210 }
211
212 if (fp)
213 {
214 //file exists. we need to append to it. read the header, modify the array size
215 size_t word_size;
216 bool fortran_order;
217 std::string number_type;
218 parse_npy_header(fp, word_size, true_data_shape, fortran_order, number_type);
219 assert(!fortran_order);
220
221 if (word_size != sizeof(T))
222 {
223 Foam::Info << "libnpy error: " << fname << " has word size " << word_size <<
224 " but npy_save appending data sized " << sizeof(T) << "\n";
225 assert( word_size == sizeof(T) );
226 }
227
228 if (true_data_shape.size() != shape.size())
229 {
230 Foam::Info <<
231 "libnpy error: npy_save attempting to append misdimensioned data to " << fname
232 << "\n";
233 assert(true_data_shape.size() != shape.size());
234 }
235
236 for (size_t i = 1; i < shape.size(); i++)
237 {
238 if (shape[i] != true_data_shape[i])
239 {
240 Foam::Info << "libnpy error: npy_save attempting to append misshaped data to " <<
241 fname << "\n";
242 assert(shape[i] == true_data_shape[i]);
243 }
244 }
245
246 true_data_shape[0] += shape[0];
247 }
248 else
249 {
250 fp = fopen(fname.c_str(), "wb");
251 true_data_shape = shape;
252 }
253
254 std::vector<char> header = create_npy_header<T>(true_data_shape);
255 size_t nels = std::accumulate(shape.begin(), shape.end(), 1,
256 std::multiplies<size_t>());
257 fseek(fp, 0, SEEK_SET);
258 fwrite(& header[0], sizeof(char), header.size(), fp);
259 fseek(fp, 0, SEEK_END);
260 fwrite(data, sizeof(T), nels, fp);
261 fclose(fp);
262}
263
264template<typename T> void npz_save(std::string zipname, std::string fname,
265 const T* data, const std::vector<size_t>& shape, std::string mode = "w",
266 bool compress = false)
267{
268 //first, append a .npy to the fname
269 fname += ".npy";
270 //now, on with the show
271 FILE* fp = NULL;
272 uint16_t nrecs = 0;
273 size_t global_header_offset = 0;
274 std::vector<char> global_header;
275
276 if (mode == "a")
277 {
278 fp = fopen(zipname.c_str(), "r+b");
279 }
280
281 if (fp)
282 {
283 //zip file exists. we need to add a new npy file to it.
284 //first read the footer. this gives us the offset and size of the global header
285 //then read and store the global header.
286 //below, we will write the the new data at the start of the global header then append the global header and footer below it
287 size_t global_header_size;
288 parse_zip_footer(fp, nrecs, global_header_size, global_header_offset);
289 fseek(fp, global_header_offset, SEEK_SET);
290 global_header.resize(global_header_size);
291 size_t res = fread(&global_header[0], sizeof(char), global_header_size, fp);
292
293 if (res != global_header_size)
294 {
295 throw std::runtime_error("npz_save: header read error while adding to existing zip");
296 }
297
298 fseek(fp, global_header_offset, SEEK_SET);
299 }
300 else
301 {
302 fp = fopen(zipname.c_str(), "wb");
303 }
304
305 std::vector<char> npy_header = create_npy_header<T>(shape);
306 size_t nels = std::accumulate(shape.begin(), shape.end(), 1,
307 std::multiplies<size_t>());
308 size_t nbytes_uncompressed = nels * sizeof(T) + npy_header.size();
309 // Prepare data and compression parameters
310 std::vector<uint8_t> buffer_compressed;
311 size_t nbytes_on_disk;
312 uint16_t compression_method;
313 uint32_t crc;
314
315 if (compress)
316 {
317 // Create uncompressed buffer (header + data)
318 std::vector<uint8_t> uncompressed(nbytes_uncompressed);
319 memcpy(&uncompressed[0], &npy_header[0], npy_header.size());
320 memcpy(&uncompressed[npy_header.size()], data, nels * sizeof(T));
321 // Get CRC of uncompressed data
322 crc = crc32(0L, &uncompressed[0], nbytes_uncompressed);
323 // Compress using zlib deflate (raw deflate, no zlib/gzip header)
324 uLongf max_compressed_size = compressBound(nbytes_uncompressed);
325 buffer_compressed.resize(max_compressed_size);
326 z_stream strm;
327 strm.zalloc = Z_NULL;
328 strm.zfree = Z_NULL;
329 strm.opaque = Z_NULL;
330 int ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, -MAX_WBITS, 8,
331 Z_DEFAULT_STRATEGY);
332
333 if (ret != Z_OK)
334 {
335 fclose(fp);
336 throw std::runtime_error("npz_save: deflateInit2 failed");
337 }
338
339 strm.avail_in = nbytes_uncompressed;
340 strm.next_in = &uncompressed[0];
341 strm.avail_out = max_compressed_size;
342 strm.next_out = &buffer_compressed[0];
343 ret = deflate(&strm, Z_FINISH);
344
345 if (ret != Z_STREAM_END)
346 {
347 deflateEnd(&strm);
348 fclose(fp);
349 throw std::runtime_error("npz_save: deflate failed");
350 }
351
352 nbytes_on_disk = strm.total_out;
353 deflateEnd(&strm);
354 compression_method = 8; // deflate
355 }
356 else
357 {
358 // No compression - CRC computed in two parts
359 crc = crc32(0L, (uint8_t*)&npy_header[0], npy_header.size());
360 crc = crc32(crc, (uint8_t*)data, nels * sizeof(T));
361 nbytes_on_disk = nbytes_uncompressed;
362 compression_method = 0; // store
363 }
364
365 //build the local header
366 std::vector<char> local_header;
367 local_header += "PK"; //first part of sig
368 local_header += (uint16_t) 0x0403; //second part of sig
369 local_header += (uint16_t) 20; //min version to extract
370 local_header += (uint16_t) 0; //general purpose bit flag
371 local_header += (uint16_t) compression_method; //compression method
372 local_header += (uint16_t) 0; //file last mod time
373 local_header += (uint16_t) 0; //file last mod date
374 local_header += (uint32_t) crc; //crc
375 local_header += (uint32_t) nbytes_on_disk; //compressed size
376 local_header += (uint32_t) nbytes_uncompressed; //uncompressed size
377 local_header += (uint16_t) fname.size(); //fname length
378 local_header += (uint16_t) 0; //extra field length
379 local_header += fname;
380 //build global header
381 global_header += "PK"; //first part of sig
382 global_header += (uint16_t) 0x0201; //second part of sig
383 global_header += (uint16_t) 20; //version made by
384 global_header.insert(global_header.end(), local_header.begin() + 4,
385 local_header.begin() + 30);
386 global_header += (uint16_t) 0; //file comment length
387 global_header += (uint16_t) 0; //disk number where file starts
388 global_header += (uint16_t) 0; //internal file attributes
389 global_header += (uint32_t) 0; //external file attributes
390 global_header += (uint32_t)
391 global_header_offset; //relative offset of local file header, since it begins where the global header used to begin
392 global_header += fname;
393 //build footer
394 std::vector<char> footer;
395 footer += "PK"; //first part of sig
396 footer += (uint16_t) 0x0605; //second part of sig
397 footer += (uint16_t) 0; //number of this disk
398 footer += (uint16_t) 0; //disk where footer starts
399 footer += (uint16_t) (nrecs + 1); //number of records on this disk
400 footer += (uint16_t) (nrecs + 1); //total number of records
401 footer += (uint32_t) global_header.size(); //nbytes of global headers
402 footer += (uint32_t) (global_header_offset + nbytes_on_disk +
403 local_header.size()); //offset of start of global headers, since global header now starts after newly written array
404 footer += (uint16_t) 0; //zip file comment length
405 //write everything
406 fwrite(&local_header[0], sizeof(char), local_header.size(), fp);
407
408 if (compress)
409 {
410 fwrite(&buffer_compressed[0], sizeof(char), nbytes_on_disk, fp);
411 }
412 else
413 {
414 fwrite(&npy_header[0], sizeof(char), npy_header.size(), fp);
415 fwrite(data, sizeof(T), nels, fp);
416 }
417
418 fwrite(&global_header[0], sizeof(char), global_header.size(), fp);
419 fwrite(&footer[0], sizeof(char), footer.size(), fp);
420 fclose(fp);
421}
422
423template<typename T> void npy_save(std::string fname, const std::vector<T> data,
424 std::string mode = "w")
425{
426 std::vector<size_t> shape;
427 shape.push_back(data.size());
428 npy_save(fname, & data[0], shape, mode);
429}
430
431template<typename T> void npz_save(std::string zipname, std::string fname,
432 const std::vector<T> data, std::string mode = "w", bool compress = false)
433{
434 std::vector<size_t> shape;
435 shape.push_back(data.size());
436 npz_save(zipname, fname, &data[0], shape, mode, compress);
437}
438
439// Convenience wrapper for compressed save
440template<typename T> void npz_save_compressed(std::string zipname,
441 std::string fname, const T* data, const std::vector<size_t>& shape,
442 std::string mode = "w")
443{
444 npz_save(zipname, fname, data, shape, mode, true);
445}
446template<typename T> void npz_save_compressed(std::string zipname,
447 std::string fname, const std::vector<T> data, std::string mode = "w")
448{
449 std::vector<size_t> shape;
450 shape.push_back(data.size());
451 npz_save(zipname, fname, &data[0], shape, mode, true);
452}
453
454template<typename T> std::vector<char> create_npy_header(
455 const std::vector<size_t>& shape)
456{
457 std::vector<char> dict;
458 dict += "{'descr': '";
459 dict += BigEndianTest();
460 dict += map_type(typeid(T));
461 dict += std::to_string(sizeof(T));
462 dict += "', 'fortran_order': False, 'shape': (";
463 dict += std::to_string(shape[0]);
464
465 for (size_t i = 1; i < shape.size(); i++)
466 {
467 dict += ", ";
468 dict += std::to_string(shape[i]);
469 }
470
471 if (shape.size() == 1)
472 {
473 dict += ",";
474 }
475
476 dict += "), }";
477 //pad with spaces so that preamble+dict is modulo 16 bytes. preamble is 10 bytes. dict needs to end with \n
478 int remainder = 16 - (10 + dict.size()) % 16;
479 dict.insert(dict.end(), remainder, ' ');
480 dict.back() = '\n';
481 std::vector<char> header;
482 header += (char) 0x93;
483 header += "NUMPY";
484 header += (char) 0x01; //major version of numpy format
485 header += (char) 0x00; //minor version of numpy format
486 header += (uint16_t) dict.size();
487 header.insert(header.end(), dict.begin(), dict.end());
488 return header;
489}
490
491template<typename T, int dim>
492Eigen::Matrix < T, -1, dim > load(Eigen::Matrix < T, -1, dim > & mat,
493 const std::string fname);
494
495template<typename T>
496Eigen::SparseMatrix<T> load(Eigen::SparseMatrix<T>& mat,
497 const std::string fname);
498
499template<typename T>
500Eigen::Tensor<T, 3> load(Eigen::Tensor<T, 3>& tens,
501 const std::string fname);
502
503template<typename T, int dim>
504void save(const Eigen::Matrix < T, -1, dim > & mat, const std::string fname);
505
506template<typename T>
507void save(const Eigen::Tensor<T, 3>& tensor, const std::string fname);
508
509template<typename T>
510void save(const Eigen::SparseMatrix<T>& mat, const std::string fname);
511
512cnpy::NpyArray load_the_npy_file(FILE* fp);
513cnpy::NpyArray load_the_npy_file(std::string fname);
514
515}
516
517#endif