MMTF-C++
The C++ language MMTF libraries
Loading...
Searching...
No Matches
binary_encoder.hpp
Go to the documentation of this file.
1// *************************************************************************
2//
3// Licensed under the MIT License (see accompanying LICENSE file).
4//
5// The author of this code is: Daniel Farrell
6//
7// Based on mmtf_python, adapted to c++ standards 2018
8//
9// *************************************************************************
10
11
12#ifndef MMTF_BINARY_ENCODER_H
13#define MMTF_BINARY_ENCODER_H
14#include <math.h>
15#include <vector>
16#include <string>
17#include <sstream>
18
19// byteorder functions
20#ifdef WIN32
21#include <winsock2.h>
22#else
23#include <arpa/inet.h>
24#endif
25
26namespace mmtf {
27
28// *************************************************************************
29// PRIVATE FUNCTIONS (only visible in this header)
30// *************************************************************************
31
32namespace { // private helpers
33
40inline std::vector<int32_t> convertFloatsToInts(std::vector<float> const & vec_in,
41 int multiplier);
42
48inline std::vector<int32_t> deltaEncode(std::vector<int32_t> const & vec_in);
49
50
56template<typename Int>
57inline std::vector<int32_t> runLengthEncode(std::vector<Int> const & vec_in );
58
66inline std::vector<int32_t> recursiveIndexEncode(std::vector<int32_t> const & vec_in,
67 int max=32767, int min=-32768);
68
76inline void add_header(std::stringstream & ss, uint32_t array_size, uint32_t codec, uint32_t param=0);
77
83inline std::vector<char> stringstreamToCharVector(std::stringstream & ss);
84
85} // anon ns
86
87// *************************************************************************
88// PUBLIC FUNCTIONS
89// *************************************************************************
90
95inline std::vector<char> encodeInt8ToByte(std::vector<int8_t> vec_in);
96
101inline std::vector<char> encodeFourByteInt(std::vector<int32_t> const & vec_in);
102
108inline std::vector<char> encodeStringVector(std::vector<std::string> const & in_sv, int32_t const CHAIN_LEN);
109
110
115inline std::vector<char> encodeRunLengthChar(std::vector<char> const & in_cv);
116
117
122inline std::vector<char> encodeRunLengthDeltaInt(std::vector<int32_t> int_vec);
123
129inline std::vector<char> encodeRunLengthFloat(std::vector<float> const & floats_in, int32_t const multiplier);
130
136inline std::vector<char> encodeDeltaRecursiveFloat(std::vector<float> const & floats_in, int32_t const multiplier);
137
142inline std::vector<char> encodeRunLengthInt8(std::vector<int8_t> const & int8_vec);
143
144// *************************************************************************
145// IMPLEMENTATION
146// *************************************************************************
147
148namespace { // private helpers
149
150inline std::vector<int32_t> convertFloatsToInts(std::vector<float> const & vec_in,
151 int const multiplier) {
152 std::vector<int32_t> vec_out;
153 for (size_t i=0; i<vec_in.size(); ++i) {
154 vec_out.push_back(static_cast<int32_t>(round(vec_in[i]*multiplier)));
155 }
156 return vec_out;
157}
158
159
160inline std::vector<int32_t> deltaEncode(std::vector<int32_t> const & vec_in) {
161 std::vector<int32_t> vec_out;
162 if (vec_in.size() == 0) return vec_out;
163 vec_out.push_back(vec_in[0]);
164 for (int32_t i=1; i< (int)vec_in.size(); ++i) {
165 vec_out.push_back(vec_in[i]-vec_in[i-1]);
166 }
167 return vec_out;
168}
169
170
171template<typename Int>
172inline std::vector<int32_t> runLengthEncode(std::vector<Int> const & vec_in ) {
173 std::vector<int32_t> ret;
174 if (vec_in.size()==0) return ret;
175 Int curr = vec_in[0];
176 ret.push_back((int32_t)curr);
177 int32_t counter = 1;
178 for (std::size_t i = 1; i < vec_in.size(); ++i) {
179 if ( vec_in[i] == curr ) {
180 ++counter;
181 } else {
182 ret.push_back(counter);
183 ret.push_back((int32_t)vec_in[i]);
184 curr = vec_in[i];
185 counter = 1;
186 }
187 }
188 ret.push_back(counter);
189 return ret;
190}
191
192
193inline std::vector<int32_t> recursiveIndexEncode(
194 std::vector<int32_t> const & vec_in,
195 int max /* =32767 */, int min /*=-32768 */) {
196 std::vector<int32_t> vec_out;
197 for (int32_t i=0; i< (int)vec_in.size(); ++i) {
198 int32_t x = vec_in[i];
199 if ( x >= 0 ) {
200 while (x >= max) {
201 vec_out.push_back(max);
202 x -= max;
203 }
204 } else {
205 while (x <= min) {
206 vec_out.push_back(min);
207 x += std::abs(min);
208 }
209 }
210 vec_out.push_back(x);
211 }
212 return vec_out;
213}
214
215
216inline void add_header(std::stringstream & ss, uint32_t array_size, uint32_t codec, uint32_t param /* =0 */) {
217 uint32_t be_codec = htonl(codec);
218 uint32_t be_array_size = htonl(array_size);
219 uint32_t be_param = htonl(param);
220 ss.write(reinterpret_cast< char * >(&be_codec), sizeof(be_codec));
221 ss.write(reinterpret_cast< char * >(&be_array_size), sizeof(be_array_size));
222 ss.write(reinterpret_cast< char * >(&be_param), sizeof(be_param));
223}
224
225
226inline std::vector<char> stringstreamToCharVector(std::stringstream & ss) {
227 std::string s = ss.str();
228 std::vector<char> ret(s.begin(), s.end());
229 return ret;
230}
231
232} // anon ns
233
234
235inline std::vector<char> encodeInt8ToByte(std::vector<int8_t> vec_in) {
236 std::stringstream ss;
237 add_header(ss, vec_in.size(), 2, 0);
238 for (size_t i=0; i<vec_in.size(); ++i) {
239 ss.write(reinterpret_cast< char * >(&vec_in[i]), sizeof(vec_in[i]));
240 }
241 return stringstreamToCharVector(ss);
242}
243
244
245inline std::vector<char> encodeFourByteInt(std::vector<int32_t> const & vec_in) {
246 std::stringstream ss;
247 add_header(ss, vec_in.size(), 4, 0);
248 for (size_t i=0; i<vec_in.size(); ++i) {
249 int32_t be_x = htonl(vec_in[i]);
250 ss.write(reinterpret_cast< char * >(&be_x), sizeof(be_x));
251 }
252 return stringstreamToCharVector(ss);
253}
254
255
256inline std::vector<char> encodeStringVector(std::vector<std::string> const & in_sv, int32_t const CHAIN_LEN) {
257 char NULL_BYTE = 0x00;
258 std::stringstream ss;
259 add_header(ss, in_sv.size(), 5, CHAIN_LEN);
260 std::vector<char> char_vec;
261 for (size_t i=0; i<in_sv.size(); ++i) {
262 char_vec.insert(char_vec.end(), in_sv[i].begin(), in_sv[i].end());
263 for (size_t j=0; j<CHAIN_LEN-in_sv[i].size(); ++j) {
264 char_vec.push_back(NULL_BYTE);
265 }
266 }
267 for (size_t i=0; i<char_vec.size(); ++i) {
268 ss.write(reinterpret_cast< char * >(&char_vec[i]), sizeof(char_vec[i]));
269 }
270 return stringstreamToCharVector(ss);
271}
272
273
274inline std::vector<char> encodeRunLengthChar(std::vector<char> const & in_cv) {
275 std::stringstream ss;
276 add_header(ss, in_cv.size(), 6, 0);
277 std::vector<int32_t> int_vec = runLengthEncode(in_cv);
278 for (size_t i=0; i<int_vec.size(); ++i) {
279 int32_t temp = htonl(int_vec[i]);
280 ss.write(reinterpret_cast< char * >(&temp), sizeof(temp));
281 }
282 return stringstreamToCharVector(ss);
283}
284
285
286inline std::vector<char> encodeRunLengthDeltaInt(std::vector<int32_t> int_vec) {
287 std::stringstream ss;
288 add_header(ss, int_vec.size(), 8, 0);
289 int_vec = deltaEncode(int_vec);
290 int_vec = runLengthEncode(int_vec);
291 for (size_t i=0; i<int_vec.size(); ++i) {
292 int32_t temp = htonl(int_vec[i]);
293 ss.write(reinterpret_cast< char * >(&temp), sizeof(temp));
294 }
295 return stringstreamToCharVector(ss);
296}
297
298inline std::vector<char> encodeRunLengthFloat(std::vector<float> const & floats_in, int32_t const multiplier) {
299 std::stringstream ss;
300 add_header(ss, floats_in.size(), 9, multiplier);
301 std::vector<int32_t> int_vec = convertFloatsToInts(floats_in, multiplier);
302 int_vec = runLengthEncode(int_vec);
303 for (size_t i=0; i<int_vec.size(); ++i) {
304 int32_t temp = htonl(int_vec[i]);
305 ss.write(reinterpret_cast< char * >(&temp), sizeof(temp));
306 }
307 return stringstreamToCharVector(ss);
308}
309
310
311inline std::vector<char> encodeDeltaRecursiveFloat(std::vector<float> const & floats_in, int32_t const multiplier) {
312 std::stringstream ss;
313 add_header(ss, floats_in.size(), 10, multiplier);
314 std::vector<int32_t> int_vec = convertFloatsToInts(floats_in, multiplier);
315 int_vec = deltaEncode(int_vec);
316 int_vec = recursiveIndexEncode(int_vec);
317 for (size_t i=0; i<int_vec.size(); ++i) {
318 int16_t temp = htons(int_vec[i]);
319 ss.write(reinterpret_cast< char * >(&temp), sizeof(temp));
320 }
321 return stringstreamToCharVector(ss);
322}
323
324
325inline std::vector<char> encodeRunLengthInt8(std::vector<int8_t> const & int8_vec) {
326 std::stringstream ss;
327 add_header(ss, int8_vec.size(), 16, 0);
328 std::vector<int32_t> const int_vec = runLengthEncode(int8_vec);
329 for (size_t i=0; i<int_vec.size(); ++i) {
330 int32_t temp = htonl(int_vec[i]);
331 ss.write(reinterpret_cast< char * >(&temp), sizeof(temp));
332 }
333 return stringstreamToCharVector(ss);
334}
335
336} // mmtf namespace
337#endif
Definition binary_decoder.hpp:25
std::vector< char > encodeDeltaRecursiveFloat(std::vector< float > const &floats_in, int32_t const multiplier)
Definition binary_encoder.hpp:311
std::vector< char > encodeRunLengthChar(std::vector< char > const &in_cv)
Definition binary_encoder.hpp:274
std::vector< char > encodeInt8ToByte(std::vector< int8_t > vec_in)
Definition binary_encoder.hpp:235
std::vector< char > encodeRunLengthFloat(std::vector< float > const &floats_in, int32_t const multiplier)
Definition binary_encoder.hpp:298
std::vector< char > encodeFourByteInt(std::vector< int32_t > const &vec_in)
Definition binary_encoder.hpp:245
std::vector< char > encodeRunLengthInt8(std::vector< int8_t > const &int8_vec)
Definition binary_encoder.hpp:325
std::vector< char > encodeStringVector(std::vector< std::string > const &in_sv, int32_t const CHAIN_LEN)
Definition binary_encoder.hpp:256
std::vector< char > encodeRunLengthDeltaInt(std::vector< int32_t > int_vec)
Definition binary_encoder.hpp:286