tesseract  4.1.1
genericvector.h
Go to the documentation of this file.
1 // File: genericvector.h
3 // Description: Generic vector class
4 // Author: Daria Antonova
5 //
6 // (C) Copyright 2007, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 //
19 #ifndef TESSERACT_CCUTIL_GENERICVECTOR_H_
20 #define TESSERACT_CCUTIL_GENERICVECTOR_H_
21 
22 #include <algorithm>
23 #include <cassert>
24 #include <climits> // for LONG_MAX
25 #include <cstdint> // for uint32_t
26 #include <cstdio>
27 #include <cstdlib>
28 
29 #include "helpers.h"
30 #include "serialis.h"
31 #include "strngs.h"
32 #include "tesscallback.h"
33 
34 // Use PointerVector<T> below in preference to GenericVector<T*>, as that
35 // provides automatic deletion of pointers, [De]Serialize that works, and
36 // sort that works.
37 template <typename T>
38 class GenericVector {
39  public:
42  }
43  GenericVector(int size, const T& init_val) {
44  init(size);
45  init_to_size(size, init_val);
46  }
47 
48  // Copy
49  GenericVector(const GenericVector& other) {
50  this->init(other.size());
51  this->operator+=(other);
52  }
55 
57 
58  // Reserve some memory.
59  void reserve(int size);
60  // Double the size of the internal array.
61  void double_the_size();
62 
63  // Resizes to size and sets all values to t.
64  void init_to_size(int size, const T& t);
65  // Resizes to size without any initialization.
66  void resize_no_init(int size) {
67  reserve(size);
68  size_used_ = size;
69  }
70 
71  // Return the size used.
72  int size() const {
73  return size_used_;
74  }
75  // Workaround to avoid g++ -Wsign-compare warnings.
76  size_t unsigned_size() const {
77  static_assert(sizeof(size_used_) <= sizeof(size_t),
78  "Wow! sizeof(size_t) < sizeof(int32_t)!!");
79  assert(0 <= size_used_);
80  return static_cast<size_t>(size_used_);
81  }
82  int size_reserved() const {
83  return size_reserved_;
84  }
85 
86  int length() const {
87  return size_used_;
88  }
89 
90  // Return true if empty.
91  bool empty() const {
92  return size_used_ == 0;
93  }
94 
95  // Return the object from an index.
96  T& get(int index) const;
97  T& back() const;
98  T& operator[](int index) const;
99  // Returns the last object and removes it.
100  T pop_back();
101 
102  // Return the index of the T object.
103  // This method NEEDS a compare_callback to be passed to
104  // set_compare_callback.
105  int get_index(const T& object) const;
106 
107  // Return true if T is in the array
108  bool contains(const T& object) const;
109 
110  // Return true if the index is valid
111  T contains_index(int index) const;
112 
113  // Push an element in the end of the array
114  int push_back(T object);
115  void operator+=(const T& t);
116 
117  // Push an element in the end of the array if the same
118  // element is not already contained in the array.
119  int push_back_new(const T& object);
120 
121  // Push an element in the front of the array
122  // Note: This function is O(n)
123  int push_front(const T& object);
124 
125  // Set the value at the given index
126  void set(const T& t, int index);
127 
128  // Insert t at the given index, push other elements to the right.
129  void insert(const T& t, int index);
130 
131  // Removes an element at the given index and
132  // shifts the remaining elements to the left.
133  void remove(int index);
134 
135  // Truncates the array to the given size by removing the end.
136  // If the current size is less, the array is not expanded.
137  void truncate(int size) {
138  if (size < size_used_) {
139  size_used_ = size;
140  }
141  }
142 
143  // Add a callback to be called to delete the elements when the array took
144  // their ownership.
146  clear_cb_ = cb;
147  }
148 
149  // Add a callback to be called to compare the elements when needed (contains,
150  // get_id, ...)
152  compare_cb_ = cb;
153  }
154 
155  // Clear the array, calling the clear callback function if any.
156  // All the owned callbacks are also deleted.
157  // If you don't want the callbacks to be deleted, before calling clear, set
158  // the callback to nullptr.
159  void clear();
160 
161  // Delete objects pointed to by data_[i]
162  void delete_data_pointers();
163 
164  // This method clears the current object, then, does a shallow copy of
165  // its argument, and finally invalidates its argument.
166  // Callbacks are moved to the current object;
167  void move(GenericVector<T>* from);
168 
169  // Read/Write the array to a file. This does _NOT_ read/write the callbacks.
170  // The callback given must be permanent since they will be called more than
171  // once. The given callback will be deleted at the end.
172  // If the callbacks are nullptr, then the data is simply read/written using
173  // fread (and swapping)/fwrite.
174  // Returns false on error or if the callback returns false.
175  // DEPRECATED. Use [De]Serialize[Classes] instead.
176  bool write(FILE* f, TessResultCallback2<bool, FILE*, T const&>* cb) const;
177  bool read(tesseract::TFile* f,
179  // Writes a vector of simple types to the given file. Assumes that bitwise
180  // read/write of T will work. Returns false in case of error.
181  // TODO(rays) Change all callers to use TFile and remove deprecated methods.
182  bool Serialize(FILE* fp) const;
183  bool Serialize(tesseract::TFile* fp) const;
184  // Reads a vector of simple types from the given file. Assumes that bitwise
185  // read/write will work with ReverseN according to sizeof(T).
186  // Returns false in case of error.
187  // If swap is true, assumes a big/little-endian swap is needed.
188  // TFile is assumed to know about swapping.
189  bool DeSerialize(bool swap, FILE* fp);
190  bool DeSerialize(tesseract::TFile* fp);
191  // Skips the deserialization of the vector.
192  static bool SkipDeSerialize(tesseract::TFile* fp);
193  // Writes a vector of classes to the given file. Assumes the existence of
194  // bool T::Serialize(FILE* fp) const that returns false in case of error.
195  // Returns false in case of error.
196  bool SerializeClasses(FILE* fp) const;
197  bool SerializeClasses(tesseract::TFile* fp) const;
198  // Reads a vector of classes from the given file. Assumes the existence of
199  // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
200  // error. Also needs T::T() and T::T(constT&), as init_to_size is used in
201  // this function. Returns false in case of error.
202  // If swap is true, assumes a big/little-endian swap is needed.
203  bool DeSerializeClasses(bool swap, FILE* fp);
205  // Calls SkipDeSerialize on the elements of the vector.
206  static bool SkipDeSerializeClasses(tesseract::TFile* fp);
207 
208  // Allocates a new array of double the current_size, copies over the
209  // information from data to the new location, deletes data and returns
210  // the pointed to the new larger array.
211  // This function uses memcpy to copy the data, instead of invoking
212  // operator=() for each element like double_the_size() does.
213  static T* double_the_size_memcpy(int current_size, T* data) {
214  T* data_new = new T[current_size * 2];
215  memcpy(data_new, data, sizeof(T) * current_size);
216  delete[] data;
217  return data_new;
218  }
219 
220  // Reverses the elements of the vector.
221  void reverse() {
222  for (int i = 0; i < size_used_ / 2; ++i) {
223  Swap(&data_[i], &data_[size_used_ - 1 - i]);
224  }
225  }
226 
227  // Sorts the members of this vector using the less than comparator (cmp_lt),
228  // which compares the values. Useful for GenericVectors to primitive types.
229  // Will not work so great for pointers (unless you just want to sort some
230  // pointers). You need to provide a specialization to sort_cmp to use
231  // your type.
232  void sort();
233 
234  // Sort the array into the order defined by the qsort function comparator.
235  // The comparator function is as defined by qsort, ie. it receives pointers
236  // to two Ts and returns negative if the first element is to appear earlier
237  // in the result and positive if it is to appear later, with 0 for equal.
238  void sort(int (*comparator)(const void*, const void*)) {
239  qsort(data_, size_used_, sizeof(*data_), comparator);
240  }
241 
242  // Searches the array (assuming sorted in ascending order, using sort()) for
243  // an element equal to target and returns true if it is present.
244  // Use binary_search to get the index of target, or its nearest candidate.
245  bool bool_binary_search(const T& target) const {
246  int index = binary_search(target);
247  if (index >= size_used_) {
248  return false;
249  }
250  return data_[index] == target;
251  }
252  // Searches the array (assuming sorted in ascending order, using sort()) for
253  // an element equal to target and returns the index of the best candidate.
254  // The return value is conceptually the largest index i such that
255  // data_[i] <= target or 0 if target < the whole vector.
256  // NOTE that this function uses operator> so really the return value is
257  // the largest index i such that data_[i] > target is false.
258  int binary_search(const T& target) const {
259  int bottom = 0;
260  int top = size_used_;
261  while (top - bottom > 1) {
262  int middle = (bottom + top) / 2;
263  if (data_[middle] > target) {
264  top = middle;
265  } else {
266  bottom = middle;
267  }
268  }
269  return bottom;
270  }
271 
272  // Compact the vector by deleting elements using operator!= on basic types.
273  // The vector must be sorted.
274  void compact_sorted() {
275  if (size_used_ == 0) {
276  return;
277  }
278 
279  // First element is in no matter what, hence the i = 1.
280  int last_write = 0;
281  for (int i = 1; i < size_used_; ++i) {
282  // Finds next unique item and writes it.
283  if (data_[last_write] != data_[i]) {
284  data_[++last_write] = data_[i];
285  }
286  }
287  // last_write is the index of a valid data cell, so add 1.
288  size_used_ = last_write + 1;
289  }
290 
291  // Compact the vector by deleting elements for which delete_cb returns
292  // true. delete_cb is a permanent callback and will be deleted.
294  int new_size = 0;
295  int old_index = 0;
296  // Until the callback returns true, the elements stay the same.
297  while (old_index < size_used_ && !delete_cb->Run(old_index++)) {
298  ++new_size;
299  }
300  // Now just copy anything else that gets false from delete_cb.
301  for (; old_index < size_used_; ++old_index) {
302  if (!delete_cb->Run(old_index)) {
303  data_[new_size++] = data_[old_index];
304  }
305  }
306  size_used_ = new_size;
307  delete delete_cb;
308  }
309 
310  T dot_product(const GenericVector<T>& other) const {
311  T result = static_cast<T>(0);
312  for (int i = std::min(size_used_, other.size_used_) - 1; i >= 0; --i) {
313  result += data_[i] * other.data_[i];
314  }
315  return result;
316  }
317 
318  // Returns the index of what would be the target_index_th item in the array
319  // if the members were sorted, without actually sorting. Members are
320  // shuffled around, but it takes O(n) time.
321  // NOTE: uses operator< and operator== on the members.
322  int choose_nth_item(int target_index) {
323  // Make sure target_index is legal.
324  if (target_index < 0) {
325  target_index = 0; // ensure legal
326  } else if (target_index >= size_used_) {
327  target_index = size_used_ - 1;
328  }
329  unsigned int seed = 1;
330  return choose_nth_item(target_index, 0, size_used_, &seed);
331  }
332 
333  // Swaps the elements with the given indices.
334  void swap(int index1, int index2) {
335  if (index1 != index2) {
336  T tmp = data_[index1];
337  data_[index1] = data_[index2];
338  data_[index2] = tmp;
339  }
340  }
341  // Returns true if all elements of *this are within the given range.
342  // Only uses operator<
343  bool WithinBounds(const T& rangemin, const T& rangemax) const {
344  for (int i = 0; i < size_used_; ++i) {
345  if (data_[i] < rangemin || rangemax < data_[i]) {
346  return false;
347  }
348  }
349  return true;
350  }
351 
352  protected:
353  // Internal recursive version of choose_nth_item.
354  int choose_nth_item(int target_index, int start, int end, unsigned int* seed);
355 
356  // Init the object, allocating size memory.
357  void init(int size);
358 
359  // We are assuming that the object generally placed in the
360  // vector are small enough that for efficiency it makes sense
361  // to start with a larger initial size.
362  static const int kDefaultVectorSize = 4;
363  int32_t size_used_{};
364  int32_t size_reserved_{};
365  T* data_;
367  // Mutable because Run method is not const
369 };
370 
371 namespace tesseract {
372 
373 // The default FileReader loads the whole file into the vector of char,
374 // returning false on error.
375 inline bool LoadDataFromFile(const char* filename, GenericVector<char>* data) {
376  bool result = false;
377  FILE* fp = fopen(filename, "rb");
378  if (fp != nullptr) {
379  fseek(fp, 0, SEEK_END);
380  auto size = std::ftell(fp);
381  fseek(fp, 0, SEEK_SET);
382  // Trying to open a directory on Linux sets size to LONG_MAX. Catch it here.
383  if (size > 0 && size < LONG_MAX) {
384  // reserve an extra byte in case caller wants to append a '\0' character
385  data->reserve(size + 1);
386  data->resize_no_init(size);
387  result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size;
388  }
389  fclose(fp);
390  }
391  return result;
392 }
393 
394 inline bool LoadDataFromFile(const STRING& filename,
395  GenericVector<char>* data) {
396  return LoadDataFromFile(filename.string(), data);
397 }
398 
399 // The default FileWriter writes the vector of char to the filename file,
400 // returning false on error.
401 inline bool SaveDataToFile(const GenericVector<char>& data,
402  const STRING& filename) {
403  FILE* fp = fopen(filename.string(), "wb");
404  if (fp == nullptr) {
405  return false;
406  }
407  bool result =
408  static_cast<int>(fwrite(&data[0], 1, data.size(), fp)) == data.size();
409  fclose(fp);
410  return result;
411 }
412 
413 template <typename T>
414 bool cmp_eq(T const& t1, T const& t2) {
415  return t1 == t2;
416 }
417 
418 // Used by sort()
419 // return < 0 if t1 < t2
420 // return 0 if t1 == t2
421 // return > 0 if t1 > t2
422 template <typename T>
423 int sort_cmp(const void* t1, const void* t2) {
424  const T* a = static_cast<const T*>(t1);
425  const T* b = static_cast<const T*>(t2);
426  if (*a < *b) {
427  return -1;
428  }
429  if (*b < *a) {
430  return 1;
431  }
432  return 0;
433 }
434 
435 // Used by PointerVector::sort()
436 // return < 0 if t1 < t2
437 // return 0 if t1 == t2
438 // return > 0 if t1 > t2
439 template <typename T>
440 int sort_ptr_cmp(const void* t1, const void* t2) {
441  const T* a = *static_cast<T* const*>(t1);
442  const T* b = *static_cast<T* const*>(t2);
443  if (*a < *b) {
444  return -1;
445  }
446  if (*b < *a) {
447  return 1;
448  }
449  return 0;
450 }
451 
452 // Subclass for a vector of pointers. Use in preference to GenericVector<T*>
453 // as it provides automatic deletion and correct serialization, with the
454 // corollary that all copy operations are deep copies of the pointed-to objects.
455 template <typename T>
456 class PointerVector : public GenericVector<T*> {
457  public:
459  explicit PointerVector(int size) : GenericVector<T*>(size) {}
461  // Clear must be called here, even though it is called again by the base,
462  // as the base will call the wrong clear.
463  clear();
464  }
465  // Copy must be deep, as the pointers will be automatically deleted on
466  // destruction.
467  PointerVector(const PointerVector& other) : GenericVector<T*>(other) {
468  this->init(other.size());
469  this->operator+=(other);
470  }
472  this->reserve(this->size_used_ + other.size_used_);
473  for (int i = 0; i < other.size(); ++i) {
474  this->push_back(new T(*other.data_[i]));
475  }
476  return *this;
477  }
478 
480  if (&other != this) {
481  this->truncate(0);
482  this->operator+=(other);
483  }
484  return *this;
485  }
486 
487  // Removes an element at the given index and
488  // shifts the remaining elements to the left.
489  void remove(int index) {
490  delete GenericVector<T*>::data_[index];
492  }
493 
494  // Truncates the array to the given size by removing the end.
495  // If the current size is less, the array is not expanded.
496  void truncate(int size) {
497  for (int i = size; i < GenericVector<T*>::size_used_; ++i) {
498  delete GenericVector<T*>::data_[i];
499  }
501  }
502 
503  // Compact the vector by deleting elements for which delete_cb returns
504  // true. delete_cb is a permanent callback and will be deleted.
506  int new_size = 0;
507  int old_index = 0;
508  // Until the callback returns true, the elements stay the same.
509  while (old_index < GenericVector<T*>::size_used_ &&
510  !delete_cb->Run(GenericVector<T*>::data_[old_index++])) {
511  ++new_size;
512  }
513  // Now just copy anything else that gets false from delete_cb.
514  for (; old_index < GenericVector<T*>::size_used_; ++old_index) {
515  if (!delete_cb->Run(GenericVector<T*>::data_[old_index])) {
516  GenericVector<T*>::data_[new_size++] =
517  GenericVector<T*>::data_[old_index];
518  } else {
519  delete GenericVector<T*>::data_[old_index];
520  }
521  }
523  delete delete_cb;
524  }
525 
526  // Clear the array, calling the clear callback function if any.
527  // All the owned callbacks are also deleted.
528  // If you don't want the callbacks to be deleted, before calling clear, set
529  // the callback to nullptr.
530  void clear() {
533  }
534 
535  // Writes a vector of (pointers to) classes to the given file. Assumes the
536  // existence of bool T::Serialize(FILE*) const that returns false in case of
537  // error. There is no Serialize for simple types, as you would have a
538  // normal GenericVector of those.
539  // Returns false in case of error.
540  bool Serialize(FILE* fp) const {
541  int32_t used = GenericVector<T*>::size_used_;
542  if (fwrite(&used, sizeof(used), 1, fp) != 1) {
543  return false;
544  }
545  for (int i = 0; i < used; ++i) {
546  int8_t non_null = GenericVector<T*>::data_[i] != nullptr;
547  if (fwrite(&non_null, sizeof(non_null), 1, fp) != 1) {
548  return false;
549  }
550  if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) {
551  return false;
552  }
553  }
554  return true;
555  }
556  bool Serialize(TFile* fp) const {
557  int32_t used = GenericVector<T*>::size_used_;
558  if (fp->FWrite(&used, sizeof(used), 1) != 1) {
559  return false;
560  }
561  for (int i = 0; i < used; ++i) {
562  int8_t non_null = GenericVector<T*>::data_[i] != nullptr;
563  if (fp->FWrite(&non_null, sizeof(non_null), 1) != 1) {
564  return false;
565  }
566  if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) {
567  return false;
568  }
569  }
570  return true;
571  }
572  // Reads a vector of (pointers to) classes to the given file. Assumes the
573  // existence of bool T::DeSerialize(bool, Tfile*) const that returns false in
574  // case of error. There is no Serialize for simple types, as you would have a
575  // normal GenericVector of those.
576  // If swap is true, assumes a big/little-endian swap is needed.
577  // Also needs T::T(), as new T is used in this function.
578  // Returns false in case of error.
579  bool DeSerialize(bool swap, FILE* fp) {
580  uint32_t reserved;
581  if (fread(&reserved, sizeof(reserved), 1, fp) != 1) {
582  return false;
583  }
584  if (swap) {
585  Reverse32(&reserved);
586  }
587  // Arbitrarily limit the number of elements to protect against bad data.
588  assert(reserved <= UINT16_MAX);
589  if (reserved > UINT16_MAX) {
590  return false;
591  }
592  GenericVector<T*>::reserve(reserved);
593  truncate(0);
594  for (uint32_t i = 0; i < reserved; ++i) {
595  int8_t non_null;
596  if (fread(&non_null, sizeof(non_null), 1, fp) != 1) {
597  return false;
598  }
599  T* item = nullptr;
600  if (non_null != 0) {
601  item = new T;
602  if (!item->DeSerialize(swap, fp)) {
603  delete item;
604  return false;
605  }
606  this->push_back(item);
607  } else {
608  // Null elements should keep their place in the vector.
609  this->push_back(nullptr);
610  }
611  }
612  return true;
613  }
614  bool DeSerialize(TFile* fp) {
615  int32_t reserved;
616  if (!DeSerializeSize(fp, &reserved)) {
617  return false;
618  }
619  GenericVector<T*>::reserve(reserved);
620  truncate(0);
621  for (int i = 0; i < reserved; ++i) {
622  if (!DeSerializeElement(fp)) {
623  return false;
624  }
625  }
626  return true;
627  }
628  // Enables deserialization of a selection of elements. Note that in order to
629  // retain the integrity of the stream, the caller must call some combination
630  // of DeSerializeElement and DeSerializeSkip of the exact number returned in
631  // *size, assuming a true return.
632  static bool DeSerializeSize(TFile* fp, int32_t* size) {
633  return fp->FReadEndian(size, sizeof(*size), 1) == 1;
634  }
635  // Reads and appends to the vector the next element of the serialization.
637  int8_t non_null;
638  if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) {
639  return false;
640  }
641  T* item = nullptr;
642  if (non_null != 0) {
643  item = new T;
644  if (!item->DeSerialize(fp)) {
645  delete item;
646  return false;
647  }
648  this->push_back(item);
649  } else {
650  // Null elements should keep their place in the vector.
651  this->push_back(nullptr);
652  }
653  return true;
654  }
655  // Skips the next element of the serialization.
656  static bool DeSerializeSkip(TFile* fp) {
657  int8_t non_null;
658  if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) {
659  return false;
660  }
661  if (non_null != 0) {
662  if (!T::SkipDeSerialize(fp)) {
663  return false;
664  }
665  }
666  return true;
667  }
668 
669  // Sorts the items pointed to by the members of this vector using
670  // t::operator<().
671  void sort() {
672  this->GenericVector<T*>::sort(&sort_ptr_cmp<T>);
673  }
674 };
675 
676 } // namespace tesseract
677 
678 // A useful vector that uses operator== to do comparisons.
679 template <typename T>
680 class GenericVectorEqEq : public GenericVector<T> {
681  public:
684  NewPermanentTessCallback(tesseract::cmp_eq<T>));
685  }
686  explicit GenericVectorEqEq(int size) : GenericVector<T>(size) {
688  NewPermanentTessCallback(tesseract::cmp_eq<T>));
689  }
690 };
691 
692 template <typename T>
693 void GenericVector<T>::init(int size) {
694  size_used_ = 0;
695  if (size <= 0) {
696  data_ = nullptr;
697  size_reserved_ = 0;
698  } else {
699  if (size < kDefaultVectorSize) {
700  size = kDefaultVectorSize;
701  }
702  data_ = new T[size];
703  size_reserved_ = size;
704  }
705  clear_cb_ = nullptr;
706  compare_cb_ = nullptr;
707 }
708 
709 template <typename T>
711  clear();
712 }
713 
714 // Reserve some memory. If the internal array contains elements, they are
715 // copied.
716 template <typename T>
718  if (size_reserved_ >= size || size <= 0) {
719  return;
720  }
721  if (size < kDefaultVectorSize) {
722  size = kDefaultVectorSize;
723  }
724  T* new_array = new T[size];
725  for (int i = 0; i < size_used_; ++i) {
726  new_array[i] = data_[i];
727  }
728  delete[] data_;
729  data_ = new_array;
730  size_reserved_ = size;
731 }
732 
733 template <typename T>
735  if (size_reserved_ == 0) {
736  reserve(kDefaultVectorSize);
737  } else {
738  reserve(2 * size_reserved_);
739  }
740 }
741 
742 // Resizes to size and sets all values to t.
743 template <typename T>
744 void GenericVector<T>::init_to_size(int size, const T& t) {
745  reserve(size);
746  size_used_ = size;
747  for (int i = 0; i < size; ++i) {
748  data_[i] = t;
749  }
750 }
751 
752 // Return the object from an index.
753 template <typename T>
754 T& GenericVector<T>::get(int index) const {
755  assert(index >= 0 && index < size_used_);
756  return data_[index];
757 }
758 
759 template <typename T>
760 T& GenericVector<T>::operator[](int index) const {
761  assert(index >= 0 && index < size_used_);
762  return data_[index];
763 }
764 
765 template <typename T>
767  assert(size_used_ > 0);
768  return data_[size_used_ - 1];
769 }
770 // Returns the last object and removes it.
771 template <typename T>
773  assert(size_used_ > 0);
774  return data_[--size_used_];
775 }
776 
777 // Return the object from an index.
778 template <typename T>
779 void GenericVector<T>::set(const T& t, int index) {
780  assert(index >= 0 && index < size_used_);
781  data_[index] = t;
782 }
783 
784 // Shifts the rest of the elements to the right to make
785 // space for the new elements and inserts the given element
786 // at the specified index.
787 template <typename T>
788 void GenericVector<T>::insert(const T& t, int index) {
789  assert(index >= 0 && index <= size_used_);
790  if (size_reserved_ == size_used_) {
791  double_the_size();
792  }
793  for (int i = size_used_; i > index; --i) {
794  data_[i] = data_[i - 1];
795  }
796  data_[index] = t;
797  size_used_++;
798 }
799 
800 // Removes an element at the given index and
801 // shifts the remaining elements to the left.
802 template <typename T>
803 void GenericVector<T>::remove(int index) {
804  assert(index >= 0 && index < size_used_);
805  for (int i = index; i < size_used_ - 1; ++i) {
806  data_[i] = data_[i + 1];
807  }
808  size_used_--;
809 }
810 
811 // Return true if the index is valindex
812 template <typename T>
814  return index >= 0 && index < size_used_;
815 }
816 
817 // Return the index of the T object.
818 template <typename T>
819 int GenericVector<T>::get_index(const T& object) const {
820  for (int i = 0; i < size_used_; ++i) {
821  assert(compare_cb_ != nullptr);
822  if (compare_cb_->Run(object, data_[i])) {
823  return i;
824  }
825  }
826  return -1;
827 }
828 
829 // Return true if T is in the array
830 template <typename T>
831 bool GenericVector<T>::contains(const T& object) const {
832  return get_index(object) != -1;
833 }
834 
835 // Add an element in the array
836 template <typename T>
838  int index = 0;
839  if (size_used_ == size_reserved_) {
840  double_the_size();
841  }
842  index = size_used_++;
843  data_[index] = object;
844  return index;
845 }
846 
847 template <typename T>
848 int GenericVector<T>::push_back_new(const T& object) {
849  int index = get_index(object);
850  if (index >= 0) {
851  return index;
852  }
853  return push_back(object);
854 }
855 
856 // Add an element in the array (front)
857 template <typename T>
858 int GenericVector<T>::push_front(const T& object) {
859  if (size_used_ == size_reserved_) {
860  double_the_size();
861  }
862  for (int i = size_used_; i > 0; --i) {
863  data_[i] = data_[i - 1];
864  }
865  data_[0] = object;
866  ++size_used_;
867  return 0;
868 }
869 
870 template <typename T>
872  push_back(t);
873 }
874 
875 template <typename T>
877  this->reserve(size_used_ + other.size_used_);
878  for (int i = 0; i < other.size(); ++i) {
879  this->operator+=(other.data_[i]);
880  }
881  return *this;
882 }
883 
884 template <typename T>
886  if (&other != this) {
887  this->truncate(0);
888  this->operator+=(other);
889  }
890  return *this;
891 }
892 
893 // Clear the array, calling the callback function if any.
894 template <typename T>
896  if (size_reserved_ > 0 && clear_cb_ != nullptr) {
897  for (int i = 0; i < size_used_; ++i) {
898  clear_cb_->Run(data_[i]);
899  }
900  }
901  delete[] data_;
902  data_ = nullptr;
903  size_used_ = 0;
904  size_reserved_ = 0;
905  delete clear_cb_;
906  clear_cb_ = nullptr;
907  delete compare_cb_;
908  compare_cb_ = nullptr;
909 }
910 
911 template <typename T>
913  for (int i = 0; i < size_used_; ++i) {
914  delete data_[i];
915  }
916 }
917 
918 template <typename T>
920  FILE* f, TessResultCallback2<bool, FILE*, T const&>* cb) const {
921  if (fwrite(&size_reserved_, sizeof(size_reserved_), 1, f) != 1) {
922  return false;
923  }
924  if (fwrite(&size_used_, sizeof(size_used_), 1, f) != 1) {
925  return false;
926  }
927  if (cb != nullptr) {
928  for (int i = 0; i < size_used_; ++i) {
929  if (!cb->Run(f, data_[i])) {
930  delete cb;
931  return false;
932  }
933  }
934  delete cb;
935  } else {
936  if (fwrite(data_, sizeof(T), size_used_, f) != unsigned_size()) {
937  return false;
938  }
939  }
940  return true;
941 }
942 
943 template <typename T>
946  int32_t reserved;
947  if (f->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
948  return false;
949  }
950  reserve(reserved);
951  if (f->FReadEndian(&size_used_, sizeof(size_used_), 1) != 1) {
952  return false;
953  }
954  if (cb != nullptr) {
955  for (int i = 0; i < size_used_; ++i) {
956  if (!cb->Run(f, data_ + i)) {
957  delete cb;
958  return false;
959  }
960  }
961  delete cb;
962  } else {
963  if (f->FReadEndian(data_, sizeof(T), size_used_) != size_used_) {
964  return false;
965  }
966  }
967  return true;
968 }
969 
970 // Writes a vector of simple types to the given file. Assumes that bitwise
971 // read/write of T will work. Returns false in case of error.
972 template <typename T>
973 bool GenericVector<T>::Serialize(FILE* fp) const {
974  if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) {
975  return false;
976  }
977  if (fwrite(data_, sizeof(*data_), size_used_, fp) != unsigned_size()) {
978  return false;
979  }
980  return true;
981 }
982 template <typename T>
984  if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) {
985  return false;
986  }
987  if (fp->FWrite(data_, sizeof(*data_), size_used_) != size_used_) {
988  return false;
989  }
990  return true;
991 }
992 
993 // Reads a vector of simple types from the given file. Assumes that bitwise
994 // read/write will work with ReverseN according to sizeof(T).
995 // Returns false in case of error.
996 // If swap is true, assumes a big/little-endian swap is needed.
997 template <typename T>
998 bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
999  uint32_t reserved;
1000  if (fread(&reserved, sizeof(reserved), 1, fp) != 1) {
1001  return false;
1002  }
1003  if (swap) {
1004  Reverse32(&reserved);
1005  }
1006  // Arbitrarily limit the number of elements to protect against bad data.
1007  assert(reserved <= UINT16_MAX);
1008  if (reserved > UINT16_MAX) {
1009  return false;
1010  }
1011  reserve(reserved);
1012  size_used_ = reserved;
1013  if (fread(data_, sizeof(T), size_used_, fp) != unsigned_size()) {
1014  return false;
1015  }
1016  if (swap) {
1017  for (int i = 0; i < size_used_; ++i) {
1018  ReverseN(&data_[i], sizeof(data_[i]));
1019  }
1020  }
1021  return true;
1022 }
1023 template <typename T>
1025  uint32_t reserved;
1026  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
1027  return false;
1028  }
1029  // Arbitrarily limit the number of elements to protect against bad data.
1030  const uint32_t limit = 50000000;
1031  assert(reserved <= limit);
1032  if (reserved > limit) {
1033  return false;
1034  }
1035  reserve(reserved);
1036  size_used_ = reserved;
1037  return fp->FReadEndian(data_, sizeof(T), size_used_) == size_used_;
1038 }
1039 template <typename T>
1041  uint32_t reserved;
1042  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
1043  return false;
1044  }
1045  return (uint32_t)fp->FRead(nullptr, sizeof(T), reserved) == reserved;
1046 }
1047 
1048 // Writes a vector of classes to the given file. Assumes the existence of
1049 // bool T::Serialize(FILE* fp) const that returns false in case of error.
1050 // Returns false in case of error.
1051 template <typename T>
1053  if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) {
1054  return false;
1055  }
1056  for (int i = 0; i < size_used_; ++i) {
1057  if (!data_[i].Serialize(fp)) {
1058  return false;
1059  }
1060  }
1061  return true;
1062 }
1063 template <typename T>
1065  if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) {
1066  return false;
1067  }
1068  for (int i = 0; i < size_used_; ++i) {
1069  if (!data_[i].Serialize(fp)) {
1070  return false;
1071  }
1072  }
1073  return true;
1074 }
1075 
1076 // Reads a vector of classes from the given file. Assumes the existence of
1077 // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
1078 // error. Also needs T::T() and T::T(constT&), as init_to_size is used in
1079 // this function. Returns false in case of error.
1080 // If swap is true, assumes a big/little-endian swap is needed.
1081 template <typename T>
1082 bool GenericVector<T>::DeSerializeClasses(bool swap, FILE* fp) {
1083  int32_t reserved;
1084  if (fread(&reserved, sizeof(reserved), 1, fp) != 1) {
1085  return false;
1086  }
1087  if (swap) {
1088  Reverse32(&reserved);
1089  }
1090  T empty;
1091  init_to_size(reserved, empty);
1092  for (int i = 0; i < reserved; ++i) {
1093  if (!data_[i].DeSerialize(swap, fp)) {
1094  return false;
1095  }
1096  }
1097  return true;
1098 }
1099 template <typename T>
1101  int32_t reserved;
1102  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
1103  return false;
1104  }
1105  T empty;
1106  init_to_size(reserved, empty);
1107  for (int i = 0; i < reserved; ++i) {
1108  if (!data_[i].DeSerialize(fp)) {
1109  return false;
1110  }
1111  }
1112  return true;
1113 }
1114 template <typename T>
1116  int32_t reserved;
1117  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
1118  return false;
1119  }
1120  for (int i = 0; i < reserved; ++i) {
1121  if (!T::SkipDeSerialize(fp)) {
1122  return false;
1123  }
1124  }
1125  return true;
1126 }
1127 
1128 // This method clear the current object, then, does a shallow copy of
1129 // its argument, and finally invalidates its argument.
1130 template <typename T>
1132  this->clear();
1133  this->data_ = from->data_;
1134  this->size_reserved_ = from->size_reserved_;
1135  this->size_used_ = from->size_used_;
1136  this->compare_cb_ = from->compare_cb_;
1137  this->clear_cb_ = from->clear_cb_;
1138  from->data_ = nullptr;
1139  from->clear_cb_ = nullptr;
1140  from->compare_cb_ = nullptr;
1141  from->size_used_ = 0;
1142  from->size_reserved_ = 0;
1143 }
1144 
1145 template <typename T>
1147  sort(&tesseract::sort_cmp<T>);
1148 }
1149 
1150 // Internal recursive version of choose_nth_item.
1151 // The algorithm used comes from "Algorithms" by Sedgewick:
1152 // http://books.google.com/books/about/Algorithms.html?id=idUdqdDXqnAC
1153 // The principle is to choose a random pivot, and move everything less than
1154 // the pivot to its left, and everything greater than the pivot to the end
1155 // of the array, then recurse on the part that contains the desired index, or
1156 // just return the answer if it is in the equal section in the middle.
1157 // The random pivot guarantees average linear time for the same reason that
1158 // n times vector::push_back takes linear time on average.
1159 // target_index, start and and end are all indices into the full array.
1160 // Seed is a seed for rand_r for thread safety purposes. Its value is
1161 // unimportant as the random numbers do not affect the result except
1162 // between equal answers.
1163 template <typename T>
1164 int GenericVector<T>::choose_nth_item(int target_index, int start, int end,
1165  unsigned int* seed) {
1166  // Number of elements to process.
1167  int num_elements = end - start;
1168  // Trivial cases.
1169  if (num_elements <= 1) {
1170  return start;
1171  }
1172  if (num_elements == 2) {
1173  if (data_[start] < data_[start + 1]) {
1174  return target_index > start ? start + 1 : start;
1175  }
1176  return target_index > start ? start : start + 1;
1177  }
1178 // Place the pivot at start.
1179 #ifndef rand_r // _MSC_VER, ANDROID
1180  srand(*seed);
1181 # define rand_r(seed) rand()
1182 #endif // _MSC_VER
1183  int pivot = rand_r(seed) % num_elements + start;
1184  swap(pivot, start);
1185  // The invariant condition here is that items [start, next_lesser) are less
1186  // than the pivot (which is at index next_lesser) and items
1187  // [prev_greater, end) are greater than the pivot, with items
1188  // [next_lesser, prev_greater) being equal to the pivot.
1189  int next_lesser = start;
1190  int prev_greater = end;
1191  for (int next_sample = start + 1; next_sample < prev_greater;) {
1192  if (data_[next_sample] < data_[next_lesser]) {
1193  swap(next_lesser++, next_sample++);
1194  } else if (data_[next_sample] == data_[next_lesser]) {
1195  ++next_sample;
1196  } else {
1197  swap(--prev_greater, next_sample);
1198  }
1199  }
1200  // Now the invariant is set up, we recurse on just the section that contains
1201  // the desired index.
1202  if (target_index < next_lesser) {
1203  return choose_nth_item(target_index, start, next_lesser, seed);
1204  }
1205  if (target_index < prev_greater) {
1206  return next_lesser; // In equal bracket.
1207  }
1208  return choose_nth_item(target_index, prev_greater, end, seed);
1209 }
1210 
1211 #endif // TESSERACT_CCUTIL_GENERICVECTOR_H_
void compact_sorted()
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)
int choose_nth_item(int target_index)
bool empty() const
Definition: genericvector.h:91
bool WithinBounds(const T &rangemin, const T &rangemax) const
virtual R Run(A1)=0
T dot_product(const GenericVector< T > &other) const
GenericVector< T > & operator+=(const GenericVector &other)
void Reverse32(void *ptr)
Definition: helpers.h:202
int FReadEndian(void *buffer, size_t size, int count)
Definition: serialis.cpp:260
bool bool_binary_search(const T &target) const
void init_to_size(int size, const T &t)
void resize_no_init(int size)
Definition: genericvector.h:66
int32_t size_reserved_
bool write(FILE *f, TessResultCallback2< bool, FILE *, T const &> *cb) const
ICOORD & operator+=(ICOORD &op1, const ICOORD &op2)
Definition: points.h:381
int sort_ptr_cmp(const void *t1, const void *t2)
int32_t choose_nth_item(int32_t index, float *array, int32_t count)
Definition: statistc.cpp:630
int binary_search(const T &target) const
void delete_data_pointers()
size_t unsigned_size() const
Definition: genericvector.h:76
bool Serialize(FILE *fp, const char *data, size_t n)
Definition: serialis.cpp:60
void remove(int index)
int get_index(const T &object) const
T & get(int index) const
PointerVector< T > & operator=(const PointerVector &other)
bool Serialize(TFile *fp) const
bool SerializeClasses(FILE *fp) const
bool SaveDataToFile(const GenericVector< char > &data, const STRING &filename)
void move(GenericVector< T > *from)
virtual R Run(A1, A2)=0
const char * string() const
Definition: strngs.cpp:194
int length() const
Definition: genericvector.h:86
T & operator[](int index) const
static T * double_the_size_memcpy(int current_size, T *data)
void set(const T &t, int index)
T & back() const
void double_the_size()
bool DeSerialize(bool swap, FILE *fp)
void truncate(int size)
void swap(int index1, int index2)
PointerVector(const PointerVector &other)
#define rand_r(seed)
void set_clear_callback(TessCallback1< T > *cb)
int push_back_new(const T &object)
void sort(int(*comparator)(const void *, const void *))
int push_front(const T &object)
bool DeSerialize(bool swap, FILE *fp)
bool DeSerializeClasses(bool swap, FILE *fp)
int32_t size_used_
int sort_cmp(const void *t1, const void *t2)
static bool SkipDeSerialize(tesseract::TFile *fp)
int size_reserved() const
Definition: genericvector.h:82
TessCallback1< T > * clear_cb_
bool read(tesseract::TFile *f, TessResultCallback2< bool, tesseract::TFile *, T *> *cb)
static bool SkipDeSerializeClasses(tesseract::TFile *fp)
void init(int size)
void ReverseN(void *ptr, int num_bytes)
Definition: helpers.h:185
bool DeSerialize(TFile *fp)
void reserve(int size)
GenericVector(const GenericVector &other)
Definition: genericvector.h:49
bool DeSerialize(FILE *fp, char *data, size_t n)
Definition: serialis.cpp:28
bool DeSerializeElement(TFile *fp)
void insert(const T &t, int index)
static bool DeSerializeSize(TFile *fp, int32_t *size)
Definition: strngs.h:45
PointerVector< T > & operator+=(const PointerVector &other)
static const int kDefaultVectorSize
_ConstTessMemberResultCallback_5_0< false, R, T1, P1, P2, P3, P4, P5 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)(P1, P2, P3, P4, P5) const, typename Identity< P1 >::type p1, typename Identity< P2 >::type p2, typename Identity< P3 >::type p3, typename Identity< P4 >::type p4, typename Identity< P5 >::type p5)
Definition: tesscallback.h:258
T contains_index(int index) const
GenericVector(int size, const T &init_val)
Definition: genericvector.h:43
void compact(TessResultCallback1< bool, int > *delete_cb)
bool Serialize(FILE *fp) const
int push_back(T object)
bool cmp_eq(T const &t1, T const &t2)
int size() const
Definition: genericvector.h:72
void Swap(T *p1, T *p2)
Definition: helpers.h:95
TessResultCallback2< bool, T const &, T const & > * compare_cb_
void compact(TessResultCallback1< bool, const T *> *delete_cb)
bool Serialize(FILE *fp) const
int FRead(void *buffer, size_t size, int count)
Definition: serialis.cpp:271
bool contains(const T &object) const
int FWrite(const void *buffer, size_t size, int count)
Definition: serialis.cpp:319
void set_compare_callback(TessResultCallback2< bool, T const &, T const &> *cb)
GenericVectorEqEq(int size)
GenericVector< T > & operator=(const GenericVector &other)
static bool DeSerializeSkip(TFile *fp)