MMTF-C++
The C++ language MMTF libraries
Loading...
Searching...
No Matches
binary_decoder.hpp
Go to the documentation of this file.
1// *************************************************************************
2//
3// Licensed under the MIT License (see accompanying LICENSE file).
4//
5// The authors of this code are: Gerardo Tauriello, and Daniel Farrell.
6//
7// Based on mmtf_c developed by Julien Ferte (http://www.julienferte.com/),
8// Anthony Bradley, Thomas Holder with contributions from Yana Valasatava,
9// Gazal Kalyan, Alexander Rose
10//
11// *************************************************************************
12
13#ifndef MMTF_BINARY_DECODER_H
14#define MMTF_BINARY_DECODER_H
15
16#include "structure_data.hpp"
17#include "errors.hpp"
18
19#include <msgpack.hpp>
20#include <cstring> // low level mem
21#include <sstream>
22#include <limits>
23#include <algorithm>
24
25namespace mmtf {
26
31public:
41 BinaryDecoder(const msgpack::object& obj,
42 const std::string& key = "UNNAMED_BINARY");
43
53 BinaryDecoder(const std::string& str,
54 const std::string& key = "UNNAMED_BINARY");
55
71 template<typename T>
72 void decode(T& target) const;
73
74private:
75 // for error reporting
76 std::string key_;
77 // data from binary header
78 int32_t strategy_;
79 int32_t length_;
80 int32_t parameter_;
81 const char* encodedData_;
82 uint32_t encodedDataLength_; // max. size for binary is 2^32 - 1
83
84 // helper function for constructors
85 void
86 initFromData(const char * str_data,
87 const std::size_t len);
88
89 // check length consistency (throws)
90 void checkLength_(int32_t exp_length) const;
91 // check if binary data is divisible by x (throws)
92 void checkDivisibleBy_(int32_t item_size) const;
93
94 // byte decoders
95 void decodeFromBytes_(std::vector<float>& output) const;
96 void decodeFromBytes_(std::vector<int8_t>& output) const;
97 void decodeFromBytes_(std::vector<int16_t>& output) const;
98 void decodeFromBytes_(std::vector<int32_t>& output) const;
99 // special one: decode to vector of strings
100 void decodeFromBytes_(std::vector<std::string>& output) const;
101
102 // run length decoding
103 // -> Int and IntOut can be any integer types
104 // -> Int values are blindly converted to IntOut
105 template<typename Int, typename IntOut>
106 void runLengthDecode_(const std::vector<Int>& input,
107 std::vector<IntOut>& output) const;
108
109 // delta decoding -> Int can be any integer type
110 template<typename Int>
111 void deltaDecode_(const std::vector<Int>& input, std::vector<Int>& output) const;
112 // variant doing it in-place
113 template<typename Int>
114 void deltaDecode_(std::vector<Int>& in_out) const;
115
116 // recursive indexing decode -> SmallInt must be smaller than Int
117 template<typename SmallInt, typename Int>
118 void recursiveIndexDecode_(const std::vector<SmallInt>& input,
119 std::vector<Int>& output) const;
120
121 // decode integer to float -> Int can be any integer type
122 template<typename Int>
123 void decodeDivide_(const std::vector<Int>& input, float const divisor,
124 std::vector<float>& output) const;
125};
126
127// *************************************************************************
128// IMPLEMENTATION
129// *************************************************************************
130
131// helpers in anonymous namespace (only visible in this file)
132namespace {
133
134// byteorder functions ("ntohl" etc.)
135#ifdef WIN32
136#include <winsock2.h>
137#else
138#include <arpa/inet.h>
139#endif
140
141#ifndef __EMSCRIPTEN__
142void assignBigendian4(void* dst, const char* src) {
143 uint32_t tmp;
144 std::memcpy(&tmp, src, sizeof(uint32_t));
145 tmp = ntohl(tmp);
146 std::memcpy(dst, &tmp, sizeof(uint32_t));
147}
148
149void assignBigendian2(void* dst, const char* src) {
150 uint16_t tmp;
151 std::memcpy(&tmp, src, sizeof(uint16_t));
152 tmp = ntohs(tmp);
153 std::memcpy(dst, &tmp, sizeof(uint16_t));
154}
155#else
156// Need to avoid how emscripten handles memory
157// Note that this will only work on little endian machines, but this should not be a major
158// an issue as Emscripten only supports little endian hardware.
159// see: https://kripken.github.io/emscripten-site/docs/porting/guidelines/portability_guidelines.html
160
161void assignBigendian4(void* dst, const char* src) {
162 ((uint8_t*)dst)[0] = src[3];
163 ((uint8_t*)dst)[1] = src[2];
164 ((uint8_t*)dst)[2] = src[1];
165 ((uint8_t*)dst)[3] = src[0];
166}
167
168void assignBigendian2(void* dst, const char* src) {
169 ((uint8_t*)dst)[0] = src[1];
170 ((uint8_t*)dst)[1] = src[0];
171}
172#endif
173
174void arrayCopyBigendian4(void* dst, const char* src, size_t n) {
175 for (size_t i = 0; i < n; i += 4) {
176 assignBigendian4(((char*)dst) + i, src + i);
177 }
178}
179
180void arrayCopyBigendian2(void* dst, const char* src, size_t n) {
181 for (size_t i = 0; i < n; i += 2) {
182 assignBigendian2(((char*)dst) + i, src + i);
183 }
184}
185
186} // anon ns
187
188
189// note this does not set key_, you must set it in ctor
190inline void BinaryDecoder::initFromData(const char * bytes, std::size_t const len) {
191 assignBigendian4(&strategy_, bytes);
192 assignBigendian4(&length_, bytes + 4);
193 assignBigendian4(&parameter_, bytes + 8);
194 encodedData_ = bytes + 12;
195 encodedDataLength_ = len - 12;
196}
197
198inline BinaryDecoder::BinaryDecoder(const msgpack::object& obj,
199 const std::string& key)
200 : key_(key) {
201 // sanity checks
202 if (obj.type != msgpack::type::BIN) {
203 throw DecodeError("The '" + key + "' entry is not binary data");
204 }
205 if (obj.via.bin.size < 12) {
206 std::stringstream err;
207 err << "The '" + key + "' entry is too short " << obj.via.bin.size;
208 throw DecodeError(err.str());
209 }
210 this->initFromData(obj.via.bin.ptr, obj.via.bin.size);
211}
212
213inline BinaryDecoder::BinaryDecoder(const std::string& str,
214 const std::string& key)
215 : key_(key) {
216 this->initFromData(str.data(), str.size());
217}
218
219template<typename T>
220void BinaryDecoder::decode(T&) const {
221 throw mmtf::DecodeError("Invalid target type for binary '" + key_ + "'");
222}
223
224template<>
225inline void BinaryDecoder::decode(std::vector<float>& output) const {
226
227 // check strategy to parse
228 switch (strategy_) {
229 case 1: {
230 decodeFromBytes_(output);
231 break;
232 }
233 case 9: {
234 std::vector<int32_t> step1;
235 std::vector<int32_t> step2;
236 decodeFromBytes_(step1);
237 runLengthDecode_(step1, step2);
238 decodeDivide_(step2, static_cast<float>(parameter_), output);
239 break;
240 }
241 case 10: {
242 std::vector<int16_t> step1;
243 std::vector<int32_t> step2;
244 decodeFromBytes_(step1);
245 recursiveIndexDecode_(step1, step2);
246 deltaDecode_(step2);
247 decodeDivide_(step2, static_cast<float>(parameter_), output);
248 break;
249 }
250 case 11: {
251 std::vector<int16_t> step1;
252 decodeFromBytes_(step1);
253 decodeDivide_(step1, static_cast<float>(parameter_), output);
254 break;
255 }
256 case 12: {
257 std::vector<int16_t> step1;
258 std::vector<int32_t> step2;
259 decodeFromBytes_(step1);
260 recursiveIndexDecode_(step1, step2);
261 decodeDivide_(step2, static_cast<float>(parameter_), output);
262 break;
263 }
264 case 13: {
265 std::vector<int8_t> step1;
266 std::vector<int32_t> step2;
267 decodeFromBytes_(step1);
268 recursiveIndexDecode_(step1, step2);
269 decodeDivide_(step2, static_cast<float>(parameter_), output);
270 break;
271 }
272 default: {
273 std::stringstream err;
274 err << "Invalid strategy " << strategy_ << " for binary '" + key_
275 << "': does not decode to float array";
276 throw DecodeError(err.str());
277 }
278 }
279
280 // check size
281 checkLength_(output.size());
282}
283
284template<>
285inline void BinaryDecoder::decode(std::vector<int8_t>& output) const {
286
287 // check strategy to parse
288 switch (strategy_) {
289 case 2: {
290 decodeFromBytes_(output);
291 break;
292 }
293 case 16: {
294 std::vector<int32_t> step1;
295 decodeFromBytes_(step1);
296 runLengthDecode_(step1, output);
297 break;
298 }
299 default: {
300 std::stringstream err;
301 err << "Invalid strategy " << strategy_ << " for binary '" + key_
302 << "': does not decode to int8 array";
303 throw DecodeError(err.str());
304 }
305 }
306
307 // check size
308 checkLength_(output.size());
309}
310
311template<>
312inline void BinaryDecoder::decode(std::vector<int16_t>& output) const {
313
314 // check strategy to parse
315 switch (strategy_) {
316 case 3: {
317 decodeFromBytes_(output);
318 break;
319 }
320 default: {
321 std::stringstream err;
322 err << "Invalid strategy " << strategy_ << " for binary '" + key_
323 << "': does not decode to int16 array";
324 throw DecodeError(err.str());
325 }
326 }
327
328 // check size
329 checkLength_(output.size());
330}
331
332template<>
333inline void BinaryDecoder::decode(std::vector<int32_t>& output) const {
334
335 // check strategy to parse
336 switch (strategy_) {
337 case 4: {
338 decodeFromBytes_(output);
339 break;
340 }
341 case 7: {
342 std::vector<int32_t> step1;
343 decodeFromBytes_(step1);
344 runLengthDecode_(step1, output);
345 break;
346 }
347 case 8: {
348 std::vector<int32_t> step1;
349 decodeFromBytes_(step1);
350 runLengthDecode_(step1, output);
351 deltaDecode_(output);
352 break;
353 }
354 case 14: {
355 std::vector<int16_t> step1;
356 decodeFromBytes_(step1);
357 recursiveIndexDecode_(step1, output);
358 break;
359 }
360 case 15: {
361 std::vector<int8_t> step1;
362 decodeFromBytes_(step1);
363 recursiveIndexDecode_(step1, output);
364 break;
365 }
366 default: {
367 std::stringstream err;
368 err << "Invalid strategy " << strategy_ << " for binary '" + key_
369 << "': does not decode to int32 array";
370 throw DecodeError(err.str());
371 }
372 }
373
374 // check size
375 checkLength_(output.size());
376}
377
378template<>
379inline void BinaryDecoder::decode(std::vector<std::string>& output) const {
380
381 // check strategy to parse
382 switch (strategy_) {
383 case 5: {
384 decodeFromBytes_(output);
385 break;
386 }
387 default: {
388 std::stringstream err;
389 err << "Invalid strategy " << strategy_ << " for binary '" + key_
390 << "': does not decode to string array";
391 throw DecodeError(err.str());
392 }
393 }
394
395 // check size
396 checkLength_(output.size());
397}
398
399template<>
400inline void BinaryDecoder::decode(std::vector<char>& output) const {
401
402 // check strategy to parse
403 switch (strategy_) {
404 case 6: {
405 std::vector<int32_t> step1;
406 decodeFromBytes_(step1);
407 runLengthDecode_(step1, output);
408 break;
409 }
410 default: {
411 std::stringstream err;
412 err << "Invalid strategy " << strategy_ << " for binary '" + key_
413 << "': does not decode to string array";
414 throw DecodeError(err.str());
415 }
416 }
417
418 // check size
419 checkLength_(output.size());
420}
421
422// checks
423inline void BinaryDecoder::checkLength_(int32_t exp_length) const {
424 if (length_ != exp_length) {
425 std::stringstream err;
426 err << "Length mismatch for binary '" + key_ + "': "
427 << length_ << " vs " << exp_length;
428 throw DecodeError(err.str());
429 }
430}
431
432inline void BinaryDecoder::checkDivisibleBy_(int32_t item_size) const {
433 if (encodedDataLength_ % item_size != 0) {
434 std::stringstream err;
435 err << "Binary length of '" + key_ + "': "
436 << encodedDataLength_ << " is not a multiple of " << item_size;
437 throw DecodeError(err.str());
438 }
439}
440
441// byte decoders
442inline void BinaryDecoder::decodeFromBytes_(std::vector<float>& output) const {
443 checkDivisibleBy_(4);
444 // prepare memory
445 output.resize(encodedDataLength_ / 4);
446 // get data
447 if(!output.empty()) {
448 arrayCopyBigendian4(&output[0], encodedData_, encodedDataLength_);
449 }
450}
451inline void BinaryDecoder::decodeFromBytes_(std::vector<int8_t>& output) const {
452 // prepare memory
453 output.resize(encodedDataLength_);
454 // get data
455 if (!output.empty()) {
456 memcpy(&output[0], encodedData_, encodedDataLength_);
457 }
458}
459inline void BinaryDecoder::decodeFromBytes_(std::vector<int16_t>& output) const {
460 checkDivisibleBy_(2);
461 // prepare memory
462 output.resize(encodedDataLength_ / 2);
463 // get data
464 if (!output.empty()) {
465 arrayCopyBigendian2(&output[0], encodedData_, encodedDataLength_);
466 }
467}
468inline void BinaryDecoder::decodeFromBytes_(std::vector<int32_t>& output) const {
469 checkDivisibleBy_(4);
470 // prepare memory
471 output.resize(encodedDataLength_ / 4);
472 // get data
473 if (!output.empty()) {
474 arrayCopyBigendian4(&output[0], encodedData_, encodedDataLength_);
475 }
476}
477// special one: decode to vector of strings
478inline void BinaryDecoder::decodeFromBytes_(std::vector<std::string>& output) const {
479 char NULL_BYTE = 0x00;
480 // check parameter
481 const int32_t str_len = parameter_;
482 checkDivisibleBy_(str_len);
483 // prepare memory
484 output.resize(encodedDataLength_ / str_len);
485 // get data
486 for (size_t i = 0; i < output.size(); ++i) {
487 output[i].assign(encodedData_ + i * str_len, str_len);
488 output[i].erase(std::remove(output[i].begin(), output[i].end(), NULL_BYTE), output[i].end());
489 }
490}
491
492// run length decoding
493template<typename Int, typename IntOut>
494void BinaryDecoder::runLengthDecode_(const std::vector<Int>& input,
495 std::vector<IntOut>& output) const {
496 // we work with pairs of numbers
497 checkDivisibleBy_(2);
498 // find out size of resulting vector (for speed)
499 size_t out_size = 0;
500 for (size_t i = 0; i < input.size(); i += 2) {
501 out_size += input[i + 1];
502 }
503 // reserve space (for speed)
504 output.clear();
505 output.reserve(out_size);
506 // get data
507 for (size_t i = 0; i < input.size(); i += 2) {
508 const IntOut value = IntOut(input[i]);
509 const Int number = input[i+1];
510 for (Int j = 0; j < number; ++j) {
511 output.push_back(value);
512 }
513 }
514}
515
516// delta decoding
517template<typename Int>
518void BinaryDecoder::deltaDecode_(const std::vector<Int>& input,
519 std::vector<Int>& output) const {
520 // reserve space (for speed)
521 output.clear();
522 if (input.empty()) return; // ensure we have some values
523 output.reserve(input.size());
524 // get data
525 output.push_back(input[0]);
526 for (size_t i = 1; i < input.size(); ++i) {
527 output.push_back(output[i - 1] + input[i]);
528 }
529}
530template<typename Int>
531void BinaryDecoder::deltaDecode_(std::vector<Int>& in_out) const {
532 for (size_t i = 1; i < in_out.size(); ++i) {
533 in_out[i] = in_out[i - 1] + in_out[i];
534 }
535}
536
537// recursive indexing decode
538template<typename SmallInt, typename Int>
539void BinaryDecoder::recursiveIndexDecode_(const std::vector<SmallInt>& input,
540 std::vector<Int>& output) const {
541 // get limits
542 const SmallInt min_int = std::numeric_limits<SmallInt>::min();
543 const SmallInt max_int = std::numeric_limits<SmallInt>::max();
544 // find out size of resulting vector (for speed)
545 size_t out_size = 0;
546 for (size_t i = 0; i < input.size(); ++i) {
547 if (input[i] != min_int && input[i] != max_int) ++out_size;
548 }
549 // reserve space (for speed)
550 output.clear();
551 output.reserve(out_size);
552 // get data
553 Int cur_val = 0;
554 for (size_t i = 0; i < input.size(); ++i) {
555 cur_val += input[i];
556 if (input[i] != min_int && input[i] != max_int) {
557 output.push_back(cur_val);
558 cur_val = 0;
559 }
560 }
561}
562
563// decode integer to float
564template<typename Int>
565void BinaryDecoder::decodeDivide_(const std::vector<Int>& input, float const divisor,
566 std::vector<float>& output) const {
567 // reserve space and get inverted divisor (for speed)
568 output.clear();
569 output.reserve(input.size());
570 float inv_div = float(1) / divisor;
571 // get data
572 for (size_t i = 0; i < input.size(); ++i) {
573 output.push_back(float(input[i]) * inv_div);
574 }
575}
576
577} // mmtf namespace
578
579#endif
BinaryDecoder(const msgpack::object &obj, const std::string &key="UNNAMED_BINARY")
Initialize object given a msgpack object. Reads out binary header to prepare for call of decode.
Definition binary_decoder.hpp:198
void decode(T &target) const
Decode binary msgpack object into the given target.
Definition binary_decoder.hpp:220
Exception thrown when failing during decoding.
Definition errors.hpp:23
Definition binary_decoder.hpp:25