tesseract  4.1.1
WERD_RES Class Reference

#include <pageres.h>

Inheritance diagram for WERD_RES:
ELIST_LINK

Public Member Functions

 WERD_RES ()=default
 
 WERD_RES (WERD *the_word)
 
 WERD_RES (const WERD_RES &source)
 
 ~WERD_RES ()
 
const char * BestUTF8 (int blob_index, bool in_rtl_context) const
 
const char * RawUTF8 (int blob_index) const
 
UNICHARSET::Direction SymbolDirection (int blob_index) const
 
bool AnyRtlCharsInWord () const
 
bool AnyLtrCharsInWord () const
 
bool UnicharsInReadingOrder () const
 
void Clear ()
 
void ClearResults ()
 
void ClearWordChoices ()
 
void ClearRatings ()
 
WERD_RESoperator= (const WERD_RES &source)
 
void CopySimpleFields (const WERD_RES &source)
 
void InitForRetryRecognition (const WERD_RES &source)
 
bool SetupForRecognition (const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract, Pix *pix, int norm_mode, const TBOX *norm_box, bool numeric_mode, bool use_body_size, bool allow_detailed_fx, ROW *row, const BLOCK *block)
 
void SetupBasicsFromChoppedWord (const UNICHARSET &unicharset_in)
 
void SetupFake (const UNICHARSET &uch)
 
void SetupWordScript (const UNICHARSET &unicharset_in)
 
void SetupBlamerBundle ()
 
void SetupBlobWidthsAndGaps ()
 
void InsertSeam (int blob_number, SEAM *seam)
 
bool AlternativeChoiceAdjustmentsWorseThan (float threshold) const
 
bool IsAmbiguous ()
 
bool StatesAllValid ()
 
void DebugWordChoices (bool debug, const char *word_to_debug)
 
void DebugTopChoice (const char *msg) const
 
void FilterWordChoices (int debug_level)
 
void ComputeAdaptionThresholds (float certainty_scale, float min_rating, float max_rating, float rating_margin, float *thresholds)
 
bool LogNewRawChoice (WERD_CHOICE *word_choice)
 
bool LogNewCookedChoice (int max_num_choices, bool debug, WERD_CHOICE *word_choice)
 
void PrintBestChoices () const
 
int GetBlobsWidth (int start_blob, int last_blob)
 
int GetBlobsGap (int blob_index)
 
BLOB_CHOICEGetBlobChoice (int index) const
 
BLOB_CHOICE_LIST * GetBlobChoices (int index) const
 
void ConsumeWordResults (WERD_RES *word)
 
void ReplaceBestChoice (WERD_CHOICE *choice)
 
void RebuildBestState ()
 
void CloneChoppedToRebuild ()
 
void SetupBoxWord ()
 
void SetScriptPositions ()
 
void SetAllScriptPositions (tesseract::ScriptPos position)
 
void FakeClassifyWord (int blob_count, BLOB_CHOICE **choices)
 
void FakeWordFromRatings (PermuterType permuter)
 
void BestChoiceToCorrectText ()
 
bool ConditionalBlobMerge (TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX &> *box_cb)
 
void MergeAdjacentBlobs (int index)
 
UNICHAR_ID BothQuotes (UNICHAR_ID id1, UNICHAR_ID id2)
 
void fix_quotes ()
 
UNICHAR_ID BothHyphens (UNICHAR_ID id1, UNICHAR_ID id2)
 
bool HyphenBoxesOverlap (const TBOX &box1, const TBOX &box2)
 
void fix_hyphens ()
 
UNICHAR_ID BothSpaces (UNICHAR_ID id1, UNICHAR_ID id2)
 
void merge_tess_fails ()
 
void copy_on (WERD_RES *word_res)
 
bool PiecesAllNatural (int start, int count) const
 
- Public Member Functions inherited from ELIST_LINK
 ELIST_LINK ()
 
 ELIST_LINK (const ELIST_LINK &)
 
void operator= (const ELIST_LINK &)
 

Static Public Member Functions

static WERD_RESdeep_copy (const WERD_RES *src)
 

Public Attributes

WERDword = nullptr
 
tesseract::BoxWordbln_boxes = nullptr
 
ROWblob_row = nullptr
 
DENORM denorm
 
const UNICHARSETuch_set = nullptr
 
TWERDchopped_word = nullptr
 
GenericVector< SEAM * > seam_array
 
GenericVector< int > blob_widths
 
GenericVector< int > blob_gaps
 
std::vector< std::vector< std::pair< const char *, float > > > timesteps
 
std::vector< std::vector< std::vector< std::pair< const char *, float > > > > segmented_timesteps
 
std::vector< std::vector< std::pair< const char *, float > > > CTC_symbol_choices
 
bool leading_space = false
 
int end = 0
 
MATRIXratings = nullptr
 
WERD_CHOICEbest_choice = nullptr
 
WERD_CHOICEraw_choice = nullptr
 
WERD_CHOICE_LIST best_choices
 
BlamerBundleblamer_bundle = nullptr
 
TWERDrebuild_word = nullptr
 
tesseract::BoxWordbox_word = nullptr
 
tesseract::Tesseracttesseract = nullptr
 
GenericVector< int > best_state
 
GenericVector< STRINGcorrect_text
 
WERD_CHOICEep_choice = nullptr
 
REJMAP reject_map
 
bool tess_failed = false
 
bool tess_accepted = false
 
bool tess_would_adapt = false
 
bool done = false
 
bool small_caps = false
 
bool odd_size = false
 
const FontInfofontinfo = nullptr
 
const FontInfofontinfo2 = nullptr
 
int8_t fontinfo_id_count = 0
 
int8_t fontinfo_id2_count = 0
 
bool guessed_x_ht = true
 
bool guessed_caps_ht = true
 
CRUNCH_MODE unlv_crunch_mode = CR_NONE
 
float x_height = 0.0f
 
float caps_height = 0.0f
 
float baseline_shift = 0.0f
 
float space_certainty = 0.0f
 
bool combination = false
 
bool part_of_combo = false
 
bool reject_spaces = false
 

Detailed Description

Definition at line 166 of file pageres.h.

Constructor & Destructor Documentation

◆ WERD_RES() [1/3]

WERD_RES::WERD_RES ( )
default

◆ WERD_RES() [2/3]

WERD_RES::WERD_RES ( WERD the_word)
inline

Definition at line 345 of file pageres.h.

345  {
346  word = the_word;
347  }
WERD * word
Definition: pageres.h:186

◆ WERD_RES() [3/3]

WERD_RES::WERD_RES ( const WERD_RES source)
inline

Definition at line 350 of file pageres.h.

350  : ELIST_LINK(source) {
351  // combination is used in function Clear which is called from operator=.
352  combination = false;
353  *this = source; // see operator=
354  }
ELIST_LINK()
Definition: elst.h:85
bool combination
Definition: pageres.h:339

◆ ~WERD_RES()

WERD_RES::~WERD_RES ( )

Definition at line 1090 of file pageres.cpp.

1090  {
1091  Clear();
1092 }
void Clear()
Definition: pageres.cpp:1094

Member Function Documentation

◆ AlternativeChoiceAdjustmentsWorseThan()

bool WERD_RES::AlternativeChoiceAdjustmentsWorseThan ( float  threshold) const

Definition at line 439 of file pageres.cpp.

439  {
440  // The choices are not changed by this iteration.
441  WERD_CHOICE_IT wc_it(const_cast<WERD_CHOICE_LIST*>(&best_choices));
442  for (wc_it.forward(); !wc_it.at_first(); wc_it.forward()) {
443  WERD_CHOICE* choice = wc_it.data();
444  if (choice->adjust_factor() <= threshold)
445  return false;
446  }
447  return true;
448 }
float adjust_factor() const
Definition: ratngs.h:296
WERD_CHOICE_LIST best_choices
Definition: pageres.h:249

◆ AnyLtrCharsInWord()

bool WERD_RES::AnyLtrCharsInWord ( ) const
inline

Definition at line 409 of file pageres.h.

409  {
410  if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1)
411  return false;
412  for (int id = 0; id < best_choice->length(); id++) {
413  int unichar_id = best_choice->unichar_id(id);
414  if (unichar_id < 0 || unichar_id >= uch_set->size())
415  continue; // Ignore illegal chars.
416  UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
417  if (dir == UNICHARSET::U_LEFT_TO_RIGHT ||
419  return true;
420  }
421  return false;
422  }
int size() const
Definition: unicharset.h:341
int length() const
Definition: ratngs.h:293
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:305
Direction get_direction(UNICHAR_ID unichar_id) const
Definition: unicharset.h:690
const UNICHARSET * uch_set
Definition: pageres.h:203
WERD_CHOICE * best_choice
Definition: pageres.h:241

◆ AnyRtlCharsInWord()

bool WERD_RES::AnyRtlCharsInWord ( ) const
inline

Definition at line 393 of file pageres.h.

393  {
394  if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1)
395  return false;
396  for (int id = 0; id < best_choice->length(); id++) {
397  int unichar_id = best_choice->unichar_id(id);
398  if (unichar_id < 0 || unichar_id >= uch_set->size())
399  continue; // Ignore illegal chars.
401  uch_set->get_direction(unichar_id);
402  if (dir == UNICHARSET::U_RIGHT_TO_LEFT ||
404  return true;
405  }
406  return false;
407  }
int size() const
Definition: unicharset.h:341
int length() const
Definition: ratngs.h:293
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:305
Direction get_direction(UNICHAR_ID unichar_id) const
Definition: unicharset.h:690
const UNICHARSET * uch_set
Definition: pageres.h:203
WERD_CHOICE * best_choice
Definition: pageres.h:241

◆ BestChoiceToCorrectText()

void WERD_RES::BestChoiceToCorrectText ( )

Definition at line 923 of file pageres.cpp.

923  {
925  ASSERT_HOST(best_choice != nullptr);
926  for (int i = 0; i < best_choice->length(); ++i) {
927  UNICHAR_ID choice_id = best_choice->unichar_id(i);
928  const char* blob_choice = uch_set->id_to_unichar(choice_id);
929  correct_text.push_back(STRING(blob_choice));
930  }
931 }
int UNICHAR_ID
Definition: unichar.h:34
int length() const
Definition: ratngs.h:293
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:305
Definition: strngs.h:45
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:291
int push_back(T object)
const UNICHARSET * uch_set
Definition: pageres.h:203
WERD_CHOICE * best_choice
Definition: pageres.h:241
#define ASSERT_HOST(x)
Definition: errcode.h:88
GenericVector< STRING > correct_text
Definition: pageres.h:289

◆ BestUTF8()

const char* WERD_RES::BestUTF8 ( int  blob_index,
bool  in_rtl_context 
) const
inline

Definition at line 363 of file pageres.h.

363  {
364  if (blob_index < 0 || best_choice == nullptr ||
365  blob_index >= best_choice->length())
366  return nullptr;
367  UNICHAR_ID id = best_choice->unichar_id(blob_index);
368  if (id < 0 || id >= uch_set->size())
369  return nullptr;
370  UNICHAR_ID mirrored = uch_set->get_mirror(id);
371  if (in_rtl_context && mirrored > 0)
372  id = mirrored;
373  return uch_set->id_to_unichar_ext(id);
374  }
int UNICHAR_ID
Definition: unichar.h:34
int size() const
Definition: unicharset.h:341
int length() const
Definition: ratngs.h:293
const char * id_to_unichar_ext(UNICHAR_ID id) const
Definition: unicharset.cpp:299
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:305
const UNICHARSET * uch_set
Definition: pageres.h:203
WERD_CHOICE * best_choice
Definition: pageres.h:241
UNICHAR_ID get_mirror(UNICHAR_ID unichar_id) const
Definition: unicharset.h:697

◆ BothHyphens()

UNICHAR_ID WERD_RES::BothHyphens ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 1030 of file pageres.cpp.

1030  {
1031  const char *ch = uch_set->id_to_unichar(id1);
1032  const char *next_ch = uch_set->id_to_unichar(id2);
1033  if (strlen(ch) == 1 && strlen(next_ch) == 1 &&
1034  (*ch == '-' || *ch == '~') && (*next_ch == '-' || *next_ch == '~'))
1035  return uch_set->unichar_to_id("-");
1036  return INVALID_UNICHAR_ID;
1037 }
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:210
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:291
const UNICHARSET * uch_set
Definition: pageres.h:203

◆ BothQuotes()

UNICHAR_ID WERD_RES::BothQuotes ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 1008 of file pageres.cpp.

1008  {
1009  const char *ch = uch_set->id_to_unichar(id1);
1010  const char *next_ch = uch_set->id_to_unichar(id2);
1011  if (is_simple_quote(ch, strlen(ch)) &&
1012  is_simple_quote(next_ch, strlen(next_ch)))
1013  return uch_set->unichar_to_id("\"");
1014  return INVALID_UNICHAR_ID;
1015 }
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:210
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:291
const UNICHARSET * uch_set
Definition: pageres.h:203

◆ BothSpaces()

UNICHAR_ID WERD_RES::BothSpaces ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 1059 of file pageres.cpp.

1059  {
1060  if (id1 == id2 && id1 == uch_set->unichar_to_id(" "))
1061  return id1;
1062  else
1063  return INVALID_UNICHAR_ID;
1064 }
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:210
const UNICHARSET * uch_set
Definition: pageres.h:203

◆ Clear()

void WERD_RES::Clear ( )

Definition at line 1094 of file pageres.cpp.

1094  {
1095  if (combination) {
1096  delete word;
1097  }
1098  word = nullptr;
1099  delete blamer_bundle;
1100  blamer_bundle = nullptr;
1101  ClearResults();
1102 }
bool combination
Definition: pageres.h:339
BlamerBundle * blamer_bundle
Definition: pageres.h:252
WERD * word
Definition: pageres.h:186
void ClearResults()
Definition: pageres.cpp:1104

◆ ClearRatings()

void WERD_RES::ClearRatings ( )

Definition at line 1137 of file pageres.cpp.

1137  {
1138  if (ratings != nullptr) {
1140  delete ratings;
1141  ratings = nullptr;
1142  }
1143 }
MATRIX * ratings
Definition: pageres.h:237
void delete_matrix_pointers()
Definition: matrix.h:458

◆ ClearResults()

void WERD_RES::ClearResults ( )

Definition at line 1104 of file pageres.cpp.

1104  {
1105  done = false;
1106  fontinfo = nullptr;
1107  fontinfo2 = nullptr;
1108  fontinfo_id_count = 0;
1109  fontinfo_id2_count = 0;
1110  delete bln_boxes;
1111  bln_boxes = nullptr;
1112  blob_row = nullptr;
1113  delete chopped_word;
1114  chopped_word = nullptr;
1115  delete rebuild_word;
1116  rebuild_word = nullptr;
1117  delete box_word;
1118  box_word = nullptr;
1119  best_state.clear();
1120  correct_text.clear();
1122  seam_array.clear();
1123  blob_widths.clear();
1124  blob_gaps.clear();
1125  ClearRatings();
1126  ClearWordChoices();
1127  if (blamer_bundle != nullptr) blamer_bundle->ClearResults();
1128 }
const FontInfo * fontinfo2
Definition: pageres.h:310
const FontInfo * fontinfo
Definition: pageres.h:309
ROW * blob_row
Definition: pageres.h:197
int8_t fontinfo_id2_count
Definition: pageres.h:312
void delete_data_pointers()
TWERD * rebuild_word
Definition: pageres.h:266
BlamerBundle * blamer_bundle
Definition: pageres.h:252
TWERD * chopped_word
Definition: pageres.h:212
tesseract::BoxWord * bln_boxes
Definition: pageres.h:195
GenericVector< SEAM * > seam_array
Definition: pageres.h:214
GenericVector< int > blob_widths
Definition: pageres.h:216
GenericVector< int > best_state
Definition: pageres.h:285
void ClearRatings()
Definition: pageres.cpp:1137
int8_t fontinfo_id_count
Definition: pageres.h:311
void ClearWordChoices()
Definition: pageres.cpp:1129
GenericVector< int > blob_gaps
Definition: pageres.h:219
bool done
Definition: pageres.h:305
void ClearResults()
Definition: blamer.h:189
tesseract::BoxWord * box_word
Definition: pageres.h:272
GenericVector< STRING > correct_text
Definition: pageres.h:289

◆ ClearWordChoices()

void WERD_RES::ClearWordChoices ( )

Definition at line 1129 of file pageres.cpp.

1129  {
1130  best_choice = nullptr;
1131  delete raw_choice;
1132  raw_choice = nullptr;
1133  best_choices.clear();
1134  delete ep_choice;
1135  ep_choice = nullptr;
1136 }
WERD_CHOICE * raw_choice
Definition: pageres.h:246
WERD_CHOICE * ep_choice
Definition: pageres.h:293
WERD_CHOICE * best_choice
Definition: pageres.h:241
WERD_CHOICE_LIST best_choices
Definition: pageres.h:249

◆ CloneChoppedToRebuild()

void WERD_RES::CloneChoppedToRebuild ( )

Definition at line 835 of file pageres.cpp.

835  {
836  delete rebuild_word;
838  SetupBoxWord();
839  int word_len = box_word->length();
840  best_state.reserve(word_len);
841  correct_text.reserve(word_len);
842  for (int i = 0; i < word_len; ++i) {
845  }
846 }
Definition: blobs.h:418
TWERD * rebuild_word
Definition: pageres.h:266
void SetupBoxWord()
Definition: pageres.cpp:849
TWERD * chopped_word
Definition: pageres.h:212
GenericVector< int > best_state
Definition: pageres.h:285
void reserve(int size)
Definition: strngs.h:45
int length() const
Definition: boxword.h:83
int push_back(T object)
tesseract::BoxWord * box_word
Definition: pageres.h:272
GenericVector< STRING > correct_text
Definition: pageres.h:289

◆ ComputeAdaptionThresholds()

void WERD_RES::ComputeAdaptionThresholds ( float  certainty_scale,
float  min_rating,
float  max_rating,
float  rating_margin,
float *  thresholds 
)

Definition at line 561 of file pageres.cpp.

565  {
566  int chunk = 0;
567  int end_chunk = best_choice->state(0);
568  int end_raw_chunk = raw_choice->state(0);
569  int raw_blob = 0;
570  for (int i = 0; i < best_choice->length(); i++, thresholds++) {
571  float avg_rating = 0.0f;
572  int num_error_chunks = 0;
573 
574  // For each chunk in best choice blob i, count non-matching raw results.
575  while (chunk < end_chunk) {
576  if (chunk >= end_raw_chunk) {
577  ++raw_blob;
578  end_raw_chunk += raw_choice->state(raw_blob);
579  }
580  if (best_choice->unichar_id(i) !=
581  raw_choice->unichar_id(raw_blob)) {
582  avg_rating += raw_choice->certainty(raw_blob);
583  ++num_error_chunks;
584  }
585  ++chunk;
586  }
587 
588  if (num_error_chunks > 0) {
589  avg_rating /= num_error_chunks;
590  *thresholds = (avg_rating / -certainty_scale) * (1.0 - rating_margin);
591  } else {
592  *thresholds = max_rating;
593  }
594 
595  if (*thresholds > max_rating)
596  *thresholds = max_rating;
597  if (*thresholds < min_rating)
598  *thresholds = min_rating;
599  }
600 }
int length() const
Definition: ratngs.h:293
WERD_CHOICE * raw_choice
Definition: pageres.h:246
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:305
int state(int index) const
Definition: ratngs.h:309
float certainty() const
Definition: ratngs.h:320
WERD_CHOICE * best_choice
Definition: pageres.h:241

◆ ConditionalBlobMerge()

bool WERD_RES::ConditionalBlobMerge ( TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *  class_cb,
TessResultCallback2< bool, const TBOX &, const TBOX &> *  box_cb 
)

Definition at line 938 of file pageres.cpp.

940  {
941  ASSERT_HOST(best_choice->length() == 0 || ratings != nullptr);
942  bool modified = false;
943  for (int i = 0; i + 1 < best_choice->length(); ++i) {
944  UNICHAR_ID new_id = class_cb->Run(best_choice->unichar_id(i),
945  best_choice->unichar_id(i+1));
946  if (new_id != INVALID_UNICHAR_ID &&
947  (box_cb == nullptr || box_cb->Run(box_word->BlobBox(i),
948  box_word->BlobBox(i + 1)))) {
949  // Raw choice should not be fixed.
950  best_choice->set_unichar_id(new_id, i);
951  modified = true;
953  const MATRIX_COORD& coord = best_choice->MatrixCoord(i);
954  if (!coord.Valid(*ratings)) {
955  ratings->IncreaseBandSize(coord.row + 1 - coord.col);
956  }
957  BLOB_CHOICE_LIST* blob_choices = GetBlobChoices(i);
958  if (FindMatchingChoice(new_id, blob_choices) == nullptr) {
959  // Insert a fake result.
960  auto* blob_choice = new BLOB_CHOICE;
961  blob_choice->set_unichar_id(new_id);
962  BLOB_CHOICE_IT bc_it(blob_choices);
963  bc_it.add_before_then_move(blob_choice);
964  }
965  }
966  }
967  delete class_cb;
968  delete box_cb;
969  return modified;
970 }
int UNICHAR_ID
Definition: unichar.h:34
bool Valid(const MATRIX &m) const
Definition: matrix.h:618
MATRIX_COORD MatrixCoord(int index) const
Definition: ratngs.cpp:306
void set_unichar_id(UNICHAR_ID newunichar_id)
Definition: ratngs.h:141
int length() const
Definition: ratngs.h:293
void set_unichar_id(UNICHAR_ID unichar_id, int index)
Definition: ratngs.h:349
void IncreaseBandSize(int bandwidth)
Definition: matrix.cpp:49
void MergeAdjacentBlobs(int index)
Definition: pageres.cpp:974
BLOB_CHOICE_LIST * GetBlobChoices(int index) const
Definition: pageres.cpp:759
virtual R Run(A1, A2)=0
MATRIX * ratings
Definition: pageres.h:237
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
Definition: ratngs.cpp:184
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:305
const TBOX & BlobBox(int index) const
Definition: boxword.h:84
WERD_CHOICE * best_choice
Definition: pageres.h:241
tesseract::BoxWord * box_word
Definition: pageres.h:272
#define ASSERT_HOST(x)
Definition: errcode.h:88

◆ ConsumeWordResults()

void WERD_RES::ConsumeWordResults ( WERD_RES word)

Definition at line 765 of file pageres.cpp.

765  {
766  denorm = word->denorm;
767  blob_row = word->blob_row;
768  MovePointerData(&chopped_word, &word->chopped_word);
769  MovePointerData(&rebuild_word, &word->rebuild_word);
770  MovePointerData(&box_word, &word->box_word);
772  seam_array = word->seam_array;
773  word->seam_array.clear();
774  best_state.move(&word->best_state);
775  correct_text.move(&word->correct_text);
776  blob_widths.move(&word->blob_widths);
777  blob_gaps.move(&word->blob_gaps);
778  if (ratings != nullptr) ratings->delete_matrix_pointers();
779  MovePointerData(&ratings, &word->ratings);
780  best_choice = word->best_choice;
781  MovePointerData(&raw_choice, &word->raw_choice);
782  best_choices.clear();
783  WERD_CHOICE_IT wc_it(&best_choices);
784  wc_it.add_list_after(&word->best_choices);
785  reject_map = word->reject_map;
786  if (word->blamer_bundle != nullptr) {
787  assert(blamer_bundle != nullptr);
788  blamer_bundle->CopyResults(*(word->blamer_bundle));
789  }
791 }
void CopySimpleFields(const WERD_RES &source)
Definition: pageres.cpp:251
ROW * blob_row
Definition: pageres.h:197
REJMAP reject_map
Definition: pageres.h:294
void delete_data_pointers()
TWERD * rebuild_word
Definition: pageres.h:266
WERD_CHOICE * raw_choice
Definition: pageres.h:246
BlamerBundle * blamer_bundle
Definition: pageres.h:252
void move(GenericVector< T > *from)
TWERD * chopped_word
Definition: pageres.h:212
MATRIX * ratings
Definition: pageres.h:237
GenericVector< SEAM * > seam_array
Definition: pageres.h:214
GenericVector< int > blob_widths
Definition: pageres.h:216
DENORM denorm
Definition: pageres.h:201
GenericVector< int > best_state
Definition: pageres.h:285
void delete_matrix_pointers()
Definition: matrix.h:458
void CopyResults(const BlamerBundle &other)
Definition: blamer.h:210
GenericVector< int > blob_gaps
Definition: pageres.h:219
WERD * word
Definition: pageres.h:186
WERD_CHOICE * best_choice
Definition: pageres.h:241
tesseract::BoxWord * box_word
Definition: pageres.h:272
WERD_CHOICE_LIST best_choices
Definition: pageres.h:249
GenericVector< STRING > correct_text
Definition: pageres.h:289

◆ copy_on()

void WERD_RES::copy_on ( WERD_RES word_res)
inline

Definition at line 660 of file pageres.h.

660  { //from this word
661  word->set_flag(W_BOL, word->flag(W_BOL) || word_res->word->flag(W_BOL));
662  word->set_flag(W_EOL, word->flag(W_EOL) || word_res->word->flag(W_EOL));
663  word->copy_on(word_res->word);
664  }
bool flag(WERD_FLAGS mask) const
Definition: werd.h:117
void copy_on(WERD *other)
Definition: werd.cpp:221
end of line
Definition: werd.h:33
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:118
start of line
Definition: werd.h:32
WERD * word
Definition: pageres.h:186

◆ CopySimpleFields()

void WERD_RES::CopySimpleFields ( const WERD_RES source)

Definition at line 251 of file pageres.cpp.

251  {
252  tess_failed = source.tess_failed;
253  tess_accepted = source.tess_accepted;
255  done = source.done;
257  small_caps = source.small_caps;
258  odd_size = source.odd_size;
259  fontinfo = source.fontinfo;
260  fontinfo2 = source.fontinfo2;
263  x_height = source.x_height;
264  caps_height = source.caps_height;
266  guessed_x_ht = source.guessed_x_ht;
268  reject_spaces = source.reject_spaces;
269  uch_set = source.uch_set;
270  tesseract = source.tesseract;
271 }
const FontInfo * fontinfo2
Definition: pageres.h:310
const FontInfo * fontinfo
Definition: pageres.h:309
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:315
float x_height
Definition: pageres.h:316
bool guessed_caps_ht
Definition: pageres.h:314
int8_t fontinfo_id2_count
Definition: pageres.h:312
float caps_height
Definition: pageres.h:317
bool odd_size
Definition: pageres.h:307
tesseract::Tesseract * tesseract
Definition: pageres.h:280
bool small_caps
Definition: pageres.h:306
bool tess_accepted
Definition: pageres.h:303
bool tess_would_adapt
Definition: pageres.h:304
bool guessed_x_ht
Definition: pageres.h:313
int8_t fontinfo_id_count
Definition: pageres.h:311
bool tess_failed
Definition: pageres.h:295
float baseline_shift
Definition: pageres.h:318
const UNICHARSET * uch_set
Definition: pageres.h:203
bool done
Definition: pageres.h:305
bool reject_spaces
Definition: pageres.h:341

◆ DebugTopChoice()

void WERD_RES::DebugTopChoice ( const char *  msg) const

Definition at line 499 of file pageres.cpp.

499  {
500  tprintf("Best choice: accepted=%d, adaptable=%d, done=%d : ",
502  if (best_choice == nullptr)
503  tprintf("<Null choice>\n");
504  else
505  best_choice->print(msg);
506 }
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
void print() const
Definition: ratngs.h:570
bool tess_accepted
Definition: pageres.h:303
bool tess_would_adapt
Definition: pageres.h:304
WERD_CHOICE * best_choice
Definition: pageres.h:241
bool done
Definition: pageres.h:305

◆ DebugWordChoices()

void WERD_RES::DebugWordChoices ( bool  debug,
const char *  word_to_debug 
)

Definition at line 480 of file pageres.cpp.

480  {
481  if (debug ||
482  (word_to_debug != nullptr && *word_to_debug != '\0' && best_choice != nullptr &&
483  best_choice->unichar_string() == STRING(word_to_debug))) {
484  if (raw_choice != nullptr)
485  raw_choice->print("\nBest Raw Choice");
486 
487  WERD_CHOICE_IT it(&best_choices);
488  int index = 0;
489  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
490  WERD_CHOICE* choice = it.data();
491  STRING label;
492  label.add_str_int("\nCooked Choice #", index);
493  choice->print(label.string());
494  }
495  }
496 }
void print() const
Definition: ratngs.h:570
WERD_CHOICE * raw_choice
Definition: pageres.h:246
const char * string() const
Definition: strngs.cpp:194
void add_str_int(const char *str, int number)
Definition: strngs.cpp:377
const STRING & unichar_string() const
Definition: ratngs.h:531
Definition: strngs.h:45
WERD_CHOICE * best_choice
Definition: pageres.h:241
WERD_CHOICE_LIST best_choices
Definition: pageres.h:249

◆ deep_copy()

static WERD_RES* WERD_RES::deep_copy ( const WERD_RES src)
inlinestatic

Definition at line 649 of file pageres.h.

649  {
650  auto* result = new WERD_RES(*src);
651  // That didn't copy the ratings, but we want a copy if there is one to
652  // begin with.
653  if (src->ratings != nullptr)
654  result->ratings = src->ratings->DeepCopy();
655  return result;
656  }
WERD_RES()=default
MATRIX * DeepCopy() const
Definition: matrix.cpp:94
MATRIX * ratings
Definition: pageres.h:237

◆ FakeClassifyWord()

void WERD_RES::FakeClassifyWord ( int  blob_count,
BLOB_CHOICE **  choices 
)

Definition at line 877 of file pageres.cpp.

877  {
878  // Setup the WERD_RES.
879  ASSERT_HOST(box_word != nullptr);
880  ASSERT_HOST(blob_count == box_word->length());
882  ClearRatings();
883  ratings = new MATRIX(blob_count, 1);
884  for (int c = 0; c < blob_count; ++c) {
885  auto* choice_list = new BLOB_CHOICE_LIST;
886  BLOB_CHOICE_IT choice_it(choice_list);
887  choice_it.add_after_then_move(choices[c]);
888  ratings->put(c, c, choice_list);
889  }
891  reject_map.initialise(blob_count);
892  best_state.init_to_size(blob_count, 1);
893  done = true;
894 }
void init_to_size(int size, const T &t)
REJMAP reject_map
Definition: pageres.h:294
Definition: matrix.h:578
MATRIX * ratings
Definition: pageres.h:237
void initialise(int16_t length)
Definition: rejctmap.cpp:273
GenericVector< int > best_state
Definition: pageres.h:285
void ClearRatings()
Definition: pageres.cpp:1137
void ClearWordChoices()
Definition: pageres.cpp:1129
int length() const
Definition: boxword.h:83
void put(ICOORD pos, const T &thing)
Definition: matrix.h:223
bool done
Definition: pageres.h:305
tesseract::BoxWord * box_word
Definition: pageres.h:272
void FakeWordFromRatings(PermuterType permuter)
Definition: pageres.cpp:898
#define ASSERT_HOST(x)
Definition: errcode.h:88

◆ FakeWordFromRatings()

void WERD_RES::FakeWordFromRatings ( PermuterType  permuter)

Definition at line 898 of file pageres.cpp.

898  {
899  int num_blobs = ratings->dimension();
900  auto* word_choice = new WERD_CHOICE(uch_set, num_blobs);
901  word_choice->set_permuter(permuter);
902  for (int b = 0; b < num_blobs; ++b) {
903  UNICHAR_ID unichar_id = UNICHAR_SPACE;
904  float rating = INT32_MAX;
905  float certainty = -INT32_MAX;
906  BLOB_CHOICE_LIST* choices = ratings->get(b, b);
907  if (choices != nullptr && !choices->empty()) {
908  BLOB_CHOICE_IT bc_it(choices);
909  BLOB_CHOICE* choice = bc_it.data();
910  unichar_id = choice->unichar_id();
911  rating = choice->rating();
912  certainty = choice->certainty();
913  }
914  word_choice->append_unichar_id_space_allocated(unichar_id, 1, rating,
915  certainty);
916  }
917  LogNewRawChoice(word_choice);
918  // Ownership of word_choice taken by word here.
919  LogNewCookedChoice(1, false, word_choice);
920 }
int UNICHAR_ID
Definition: unichar.h:34
T get(ICOORD pos) const
Definition: matrix.h:231
UNICHAR_ID unichar_id() const
Definition: ratngs.h:77
MATRIX * ratings
Definition: pageres.h:237
float rating() const
Definition: ratngs.h:80
bool LogNewRawChoice(WERD_CHOICE *word_choice)
Definition: pageres.cpp:604
int dimension() const
Definition: matrix.h:536
bool LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *word_choice)
Definition: pageres.cpp:620
const UNICHARSET * uch_set
Definition: pageres.h:203
float certainty() const
Definition: ratngs.h:83

◆ FilterWordChoices()

void WERD_RES::FilterWordChoices ( int  debug_level)

Definition at line 513 of file pageres.cpp.

513  {
514  if (best_choice == nullptr || best_choices.singleton())
515  return;
516 
517  if (debug_level >= 2)
518  best_choice->print("\nFiltering against best choice");
519  WERD_CHOICE_IT it(&best_choices);
520  int index = 0;
521  for (it.forward(); !it.at_first(); it.forward(), ++index) {
522  WERD_CHOICE* choice = it.data();
523  float threshold = StopperAmbigThreshold(best_choice->adjust_factor(),
524  choice->adjust_factor());
525  // i, j index the blob choice in choice, best_choice.
526  // chunk is an index into the chopped_word blobs (AKA chunks).
527  // Since the two words may use different segmentations of the chunks, we
528  // iterate over the chunks to find out whether a comparable blob
529  // classification is much worse than the best result.
530  int i = 0, j = 0, chunk = 0;
531  // Each iteration of the while deals with 1 chunk. On entry choice_chunk
532  // and best_chunk are the indices of the first chunk in the NEXT blob,
533  // i.e. we don't have to increment i, j while chunk < choice_chunk and
534  // best_chunk respectively.
535  int choice_chunk = choice->state(0), best_chunk = best_choice->state(0);
536  while (i < choice->length() && j < best_choice->length()) {
537  if (choice->unichar_id(i) != best_choice->unichar_id(j) &&
538  choice->certainty(i) - best_choice->certainty(j) < threshold) {
539  if (debug_level >= 2) {
540  choice->print("WorstCertaintyDiffWorseThan");
541  tprintf(
542  "i %d j %d Choice->Blob[i].Certainty %.4g"
543  " WorstOtherChoiceCertainty %g Threshold %g\n",
544  i, j, choice->certainty(i), best_choice->certainty(j), threshold);
545  tprintf("Discarding bad choice #%d\n", index);
546  }
547  delete it.extract();
548  break;
549  }
550  ++chunk;
551  // If needed, advance choice_chunk to keep up with chunk.
552  while (choice_chunk < chunk && ++i < choice->length())
553  choice_chunk += choice->state(i);
554  // If needed, advance best_chunk to keep up with chunk.
555  while (best_chunk < chunk && ++j < best_choice->length())
556  best_chunk += best_choice->state(j);
557  }
558  }
559 }
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
float adjust_factor() const
Definition: ratngs.h:296
void print() const
Definition: ratngs.h:570
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:305
int state(int index) const
Definition: ratngs.h:309
float certainty() const
Definition: ratngs.h:320
WERD_CHOICE * best_choice
Definition: pageres.h:241
WERD_CHOICE_LIST best_choices
Definition: pageres.h:249

◆ fix_hyphens()

void WERD_RES::fix_hyphens ( )

Definition at line 1047 of file pageres.cpp.

1047  {
1048  if (!uch_set->contains_unichar("-") ||
1050  return; // Don't create it if it is disallowed.
1051 
1055 }
bool HyphenBoxesOverlap(const TBOX &box1, const TBOX &box2)
Definition: pageres.cpp:1041
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:210
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:671
_ConstTessMemberResultCallback_5_0< false, R, T1, P1, P2, P3, P4, P5 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)(P1, P2, P3, P4, P5) const, typename Identity< P1 >::type p1, typename Identity< P2 >::type p2, typename Identity< P3 >::type p3, typename Identity< P4 >::type p4, typename Identity< P5 >::type p5)
Definition: tesscallback.h:258
UNICHAR_ID BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2)
Definition: pageres.cpp:1030
bool ConditionalBlobMerge(TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX &> *box_cb)
Definition: pageres.cpp:938
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:878
const UNICHARSET * uch_set
Definition: pageres.h:203

◆ fix_quotes()

void WERD_RES::fix_quotes ( )

Definition at line 1018 of file pageres.cpp.

1018  {
1019  if (!uch_set->contains_unichar("\"") ||
1021  return; // Don't create it if it is disallowed.
1022 
1025  nullptr);
1026 }
UNICHAR_ID BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2)
Definition: pageres.cpp:1008
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:210
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:671
_ConstTessMemberResultCallback_5_0< false, R, T1, P1, P2, P3, P4, P5 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)(P1, P2, P3, P4, P5) const, typename Identity< P1 >::type p1, typename Identity< P2 >::type p2, typename Identity< P3 >::type p3, typename Identity< P4 >::type p4, typename Identity< P5 >::type p5)
Definition: tesscallback.h:258
bool ConditionalBlobMerge(TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX &> *box_cb)
Definition: pageres.cpp:938
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:878
const UNICHARSET * uch_set
Definition: pageres.h:203

◆ GetBlobChoice()

BLOB_CHOICE * WERD_RES::GetBlobChoice ( int  index) const

Definition at line 750 of file pageres.cpp.

750  {
751  if (index < 0 || index >= best_choice->length()) return nullptr;
752  BLOB_CHOICE_LIST* choices = GetBlobChoices(index);
753  return FindMatchingChoice(best_choice->unichar_id(index), choices);
754 }
int length() const
Definition: ratngs.h:293
BLOB_CHOICE_LIST * GetBlobChoices(int index) const
Definition: pageres.cpp:759
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
Definition: ratngs.cpp:184
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:305
WERD_CHOICE * best_choice
Definition: pageres.h:241

◆ GetBlobChoices()

BLOB_CHOICE_LIST * WERD_RES::GetBlobChoices ( int  index) const

Definition at line 759 of file pageres.cpp.

759  {
760  return best_choice->blob_choices(index, ratings);
761 }
BLOB_CHOICE_LIST * blob_choices(int index, MATRIX *ratings) const
Definition: ratngs.cpp:294
MATRIX * ratings
Definition: pageres.h:237
WERD_CHOICE * best_choice
Definition: pageres.h:241

◆ GetBlobsGap()

int WERD_RES::GetBlobsGap ( int  blob_index)

Definition at line 740 of file pageres.cpp.

740  {
741  if (blob_index < 0 || blob_index >= blob_gaps.size())
742  return 0;
743  return blob_gaps[blob_index];
744 }
GenericVector< int > blob_gaps
Definition: pageres.h:219
int size() const
Definition: genericvector.h:72

◆ GetBlobsWidth()

int WERD_RES::GetBlobsWidth ( int  start_blob,
int  last_blob 
)

Definition at line 730 of file pageres.cpp.

730  {
731  int result = 0;
732  for (int b = start_blob; b <= last_blob; ++b) {
733  result += blob_widths[b];
734  if (b < last_blob)
735  result += blob_gaps[b];
736  }
737  return result;
738 }
GenericVector< int > blob_widths
Definition: pageres.h:216
GenericVector< int > blob_gaps
Definition: pageres.h:219

◆ HyphenBoxesOverlap()

bool WERD_RES::HyphenBoxesOverlap ( const TBOX box1,
const TBOX box2 
)

Definition at line 1041 of file pageres.cpp.

1041  {
1042  return box1.right() >= box2.left();
1043 }
int16_t left() const
Definition: rect.h:72
int16_t right() const
Definition: rect.h:79

◆ InitForRetryRecognition()

void WERD_RES::InitForRetryRecognition ( const WERD_RES source)

Definition at line 277 of file pageres.cpp.

277  {
278  word = source.word;
279  CopySimpleFields(source);
280  if (source.blamer_bundle != nullptr) {
281  blamer_bundle = new BlamerBundle();
283  }
284 }
void CopySimpleFields(const WERD_RES &source)
Definition: pageres.cpp:251
BlamerBundle * blamer_bundle
Definition: pageres.h:252
WERD * word
Definition: pageres.h:186
void CopyTruth(const BlamerBundle &other)
Definition: blamer.h:203

◆ InsertSeam()

void WERD_RES::InsertSeam ( int  blob_number,
SEAM seam 
)

Definition at line 418 of file pageres.cpp.

418  {
419  // Insert the seam into the SEAMS array.
420  seam->PrepareToInsertSeam(seam_array, chopped_word->blobs, blob_number, true);
421  seam_array.insert(seam, blob_number);
422  if (ratings != nullptr) {
423  // Expand the ratings matrix.
424  ratings = ratings->ConsumeAndMakeBigger(blob_number);
425  // Fix all the segmentation states.
426  if (raw_choice != nullptr)
427  raw_choice->UpdateStateForSplit(blob_number);
428  WERD_CHOICE_IT wc_it(&best_choices);
429  for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
430  WERD_CHOICE* choice = wc_it.data();
431  choice->UpdateStateForSplit(blob_number);
432  }
434  }
435 }
GenericVector< TBLOB * > blobs
Definition: blobs.h:459
WERD_CHOICE * raw_choice
Definition: pageres.h:246
void SetupBlobWidthsAndGaps()
Definition: pageres.cpp:400
TWERD * chopped_word
Definition: pageres.h:212
MATRIX * ratings
Definition: pageres.h:237
GenericVector< SEAM * > seam_array
Definition: pageres.h:214
void UpdateStateForSplit(int blob_position)
Definition: ratngs.cpp:703
void insert(const T &t, int index)
MATRIX * ConsumeAndMakeBigger(int ind)
Definition: matrix.cpp:58
bool PrepareToInsertSeam(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int insert_index, bool modify)
Definition: seam.cpp:76
WERD_CHOICE_LIST best_choices
Definition: pageres.h:249

◆ IsAmbiguous()

bool WERD_RES::IsAmbiguous ( )

Definition at line 452 of file pageres.cpp.

452  {
453  return !best_choices.singleton() || best_choice->dangerous_ambig_found();
454 }
WERD_CHOICE * best_choice
Definition: pageres.h:241
WERD_CHOICE_LIST best_choices
Definition: pageres.h:249
bool dangerous_ambig_found() const
Definition: ratngs.h:353

◆ LogNewCookedChoice()

bool WERD_RES::LogNewCookedChoice ( int  max_num_choices,
bool  debug,
WERD_CHOICE word_choice 
)

Definition at line 620 of file pageres.cpp.

621  {
622  if (best_choice != nullptr) {
623  // Throw out obviously bad choices to save some work.
624  // TODO(rays) Get rid of this! This piece of code produces different
625  // results according to the order in which words are found, which is an
626  // undesirable behavior. It would be better to keep all the choices and
627  // prune them later when more information is available.
628  float max_certainty_delta =
629  StopperAmbigThreshold(best_choice->adjust_factor(),
630  word_choice->adjust_factor());
631  if (max_certainty_delta > -kStopperAmbiguityThresholdOffset)
632  max_certainty_delta = -kStopperAmbiguityThresholdOffset;
633  if (word_choice->certainty() - best_choice->certainty() <
634  max_certainty_delta) {
635  if (debug) {
636  STRING bad_string;
637  word_choice->string_and_lengths(&bad_string, nullptr);
638  tprintf("Discarding choice \"%s\" with an overly low certainty"
639  " %.3f vs best choice certainty %.3f (Threshold: %.3f)\n",
640  bad_string.string(), word_choice->certainty(),
642  max_certainty_delta + best_choice->certainty());
643  }
644  delete word_choice;
645  return false;
646  }
647  }
648 
649  // Insert in the list in order of increasing rating, but knock out worse
650  // string duplicates.
651  WERD_CHOICE_IT it(&best_choices);
652  const STRING& new_str = word_choice->unichar_string();
653  bool inserted = false;
654  int num_choices = 0;
655  if (!it.empty()) {
656  do {
657  WERD_CHOICE* choice = it.data();
658  if (choice->rating() > word_choice->rating() && !inserted) {
659  // Time to insert.
660  it.add_before_stay_put(word_choice);
661  inserted = true;
662  if (num_choices == 0)
663  best_choice = word_choice; // This is the new best.
664  ++num_choices;
665  }
666  if (choice->unichar_string() == new_str) {
667  if (inserted) {
668  // New is better.
669  delete it.extract();
670  } else {
671  // Old is better.
672  if (debug) {
673  tprintf("Discarding duplicate choice \"%s\", rating %g vs %g\n",
674  new_str.string(), word_choice->rating(), choice->rating());
675  }
676  delete word_choice;
677  return false;
678  }
679  } else {
680  ++num_choices;
681  if (num_choices > max_num_choices)
682  delete it.extract();
683  }
684  it.forward();
685  } while (!it.at_first());
686  }
687  if (!inserted && num_choices < max_num_choices) {
688  it.add_to_end(word_choice);
689  inserted = true;
690  if (num_choices == 0)
691  best_choice = word_choice; // This is the new best.
692  }
693  if (debug) {
694  if (inserted)
695  tprintf("New %s", best_choice == word_choice ? "Best" : "Secondary");
696  else
697  tprintf("Poor");
698  word_choice->print(" Word Choice");
699  }
700  if (!inserted) {
701  delete word_choice;
702  return false;
703  }
704  return true;
705 }
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
float adjust_factor() const
Definition: ratngs.h:296
float rating() const
Definition: ratngs.h:317
void print() const
Definition: ratngs.h:570
const char * string() const
Definition: strngs.cpp:194
void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const
Definition: ratngs.cpp:453
const STRING & unichar_string() const
Definition: ratngs.h:531
Definition: strngs.h:45
float certainty() const
Definition: ratngs.h:320
WERD_CHOICE * best_choice
Definition: pageres.h:241
WERD_CHOICE_LIST best_choices
Definition: pageres.h:249

◆ LogNewRawChoice()

bool WERD_RES::LogNewRawChoice ( WERD_CHOICE word_choice)

Definition at line 604 of file pageres.cpp.

604  {
605  if (raw_choice == nullptr || word_choice->rating() < raw_choice->rating()) {
606  delete raw_choice;
607  raw_choice = new WERD_CHOICE(*word_choice);
609  return true;
610  }
611  return false;
612 }
float rating() const
Definition: ratngs.h:317
WERD_CHOICE * raw_choice
Definition: pageres.h:246
void set_permuter(uint8_t perm)
Definition: ratngs.h:365

◆ merge_tess_fails()

void WERD_RES::merge_tess_fails ( )

Definition at line 1067 of file pageres.cpp.

1067  {
1069  NewPermanentTessCallback(this, &WERD_RES::BothSpaces), nullptr)) {
1070  int len = best_choice->length();
1071  ASSERT_HOST(reject_map.length() == len);
1072  ASSERT_HOST(box_word->length() == len);
1073  }
1074 }
UNICHAR_ID BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2)
Definition: pageres.cpp:1059
int length() const
Definition: ratngs.h:293
REJMAP reject_map
Definition: pageres.h:294
int32_t length() const
Definition: rejctmap.h:223
_ConstTessMemberResultCallback_5_0< false, R, T1, P1, P2, P3, P4, P5 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)(P1, P2, P3, P4, P5) const, typename Identity< P1 >::type p1, typename Identity< P2 >::type p2, typename Identity< P3 >::type p3, typename Identity< P4 >::type p4, typename Identity< P5 >::type p5)
Definition: tesscallback.h:258
bool ConditionalBlobMerge(TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX &> *box_cb)
Definition: pageres.cpp:938
int length() const
Definition: boxword.h:83
WERD_CHOICE * best_choice
Definition: pageres.h:241
tesseract::BoxWord * box_word
Definition: pageres.h:272
#define ASSERT_HOST(x)
Definition: errcode.h:88

◆ MergeAdjacentBlobs()

void WERD_RES::MergeAdjacentBlobs ( int  index)

Definition at line 974 of file pageres.cpp.

974  {
975  if (reject_map.length() == best_choice->length())
976  reject_map.remove_pos(index);
977  best_choice->remove_unichar_id(index + 1);
978  rebuild_word->MergeBlobs(index, index + 2);
979  box_word->MergeBoxes(index, index + 2);
980  if (index + 1 < best_state.length()) {
981  best_state[index] += best_state[index + 1];
982  best_state.remove(index + 1);
983  }
984 }
int length() const
Definition: ratngs.h:293
void MergeBoxes(int start, int end)
Definition: boxword.cpp:131
REJMAP reject_map
Definition: pageres.h:294
TWERD * rebuild_word
Definition: pageres.h:266
void remove(int index)
int length() const
Definition: genericvector.h:86
void remove_unichar_id(int index)
Definition: ratngs.h:474
GenericVector< int > best_state
Definition: pageres.h:285
int32_t length() const
Definition: rejctmap.h:223
void MergeBlobs(int start, int end)
Definition: blobs.cpp:872
void remove_pos(int16_t pos)
Definition: rejctmap.cpp:309
WERD_CHOICE * best_choice
Definition: pageres.h:241
tesseract::BoxWord * box_word
Definition: pageres.h:272

◆ operator=()

WERD_RES & WERD_RES::operator= ( const WERD_RES source)

Definition at line 188 of file pageres.cpp.

188  {
189  this->ELIST_LINK::operator=(source);
190  Clear();
191  if (source.combination) {
192  word = new WERD;
193  *word = *(source.word); // deep copy
194  } else {
195  word = source.word; // pt to same word
196  }
197  if (source.bln_boxes != nullptr)
198  bln_boxes = new tesseract::BoxWord(*source.bln_boxes);
199  if (source.chopped_word != nullptr)
200  chopped_word = new TWERD(*source.chopped_word);
201  if (source.rebuild_word != nullptr)
202  rebuild_word = new TWERD(*source.rebuild_word);
203  // TODO(rays) Do we ever need to copy the seam_array?
204  blob_row = source.blob_row;
205  denorm = source.denorm;
206  if (source.box_word != nullptr)
207  box_word = new tesseract::BoxWord(*source.box_word);
208  best_state = source.best_state;
209  correct_text = source.correct_text;
210  blob_widths = source.blob_widths;
211  blob_gaps = source.blob_gaps;
212  // None of the uses of operator= require the ratings matrix to be copied,
213  // so don't as it would be really slow.
214 
215  // Copy the cooked choices.
216  WERD_CHOICE_IT wc_it(const_cast<WERD_CHOICE_LIST*>(&source.best_choices));
217  WERD_CHOICE_IT wc_dest_it(&best_choices);
218  for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
219  const WERD_CHOICE *choice = wc_it.data();
220  wc_dest_it.add_after_then_move(new WERD_CHOICE(*choice));
221  }
222  if (!wc_dest_it.empty()) {
223  wc_dest_it.move_to_first();
224  best_choice = wc_dest_it.data();
225  } else {
226  best_choice = nullptr;
227  }
228 
229  if (source.raw_choice != nullptr) {
230  raw_choice = new WERD_CHOICE(*source.raw_choice);
231  } else {
232  raw_choice = nullptr;
233  }
234  if (source.ep_choice != nullptr) {
235  ep_choice = new WERD_CHOICE(*source.ep_choice);
236  } else {
237  ep_choice = nullptr;
238  }
239  reject_map = source.reject_map;
240  combination = source.combination;
241  part_of_combo = source.part_of_combo;
242  CopySimpleFields(source);
243  if (source.blamer_bundle != nullptr) {
244  blamer_bundle = new BlamerBundle(*(source.blamer_bundle));
245  }
246  return *this;
247 }
bool combination
Definition: pageres.h:339
Definition: blobs.h:418
void CopySimpleFields(const WERD_RES &source)
Definition: pageres.cpp:251
ROW * blob_row
Definition: pageres.h:197
bool part_of_combo
Definition: pageres.h:340
void operator=(const ELIST_LINK &)
Definition: elst.h:94
REJMAP reject_map
Definition: pageres.h:294
TWERD * rebuild_word
Definition: pageres.h:266
WERD_CHOICE * raw_choice
Definition: pageres.h:246
BlamerBundle * blamer_bundle
Definition: pageres.h:252
TWERD * chopped_word
Definition: pageres.h:212
tesseract::BoxWord * bln_boxes
Definition: pageres.h:195
GenericVector< int > blob_widths
Definition: pageres.h:216
DENORM denorm
Definition: pageres.h:201
void Clear()
Definition: pageres.cpp:1094
GenericVector< int > best_state
Definition: pageres.h:285
WERD_CHOICE * ep_choice
Definition: pageres.h:293
GenericVector< int > blob_gaps
Definition: pageres.h:219
WERD * word
Definition: pageres.h:186
WERD_CHOICE * best_choice
Definition: pageres.h:241
tesseract::BoxWord * box_word
Definition: pageres.h:272
Definition: werd.h:56
WERD_CHOICE_LIST best_choices
Definition: pageres.h:249
GenericVector< STRING > correct_text
Definition: pageres.h:289

◆ PiecesAllNatural()

bool WERD_RES::PiecesAllNatural ( int  start,
int  count 
) const

Definition at line 1078 of file pageres.cpp.

1078  {
1079  // all seams must have no splits.
1080  for (int index = start; index < start + count - 1; ++index) {
1081  if (index >= 0 && index < seam_array.size()) {
1082  SEAM* seam = seam_array[index];
1083  if (seam != nullptr && seam->HasAnySplits()) return false;
1084  }
1085  }
1086  return true;
1087 }
GenericVector< SEAM * > seam_array
Definition: pageres.h:214
bool HasAnySplits() const
Definition: seam.h:61
int count(LIST var_list)
Definition: oldlist.cpp:95
Definition: seam.h:38
int size() const
Definition: genericvector.h:72

◆ PrintBestChoices()

void WERD_RES::PrintBestChoices ( ) const

Definition at line 717 of file pageres.cpp.

717  {
718  STRING alternates_str;
719  WERD_CHOICE_IT it(const_cast<WERD_CHOICE_LIST*>(&best_choices));
720  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
721  if (!it.at_first()) alternates_str += "\", \"";
722  alternates_str += it.data()->unichar_string();
723  }
724  tprintf("Alternates for \"%s\": {\"%s\"}\n",
725  best_choice->unichar_string().string(), alternates_str.string());
726 }
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
const char * string() const
Definition: strngs.cpp:194
const STRING & unichar_string() const
Definition: ratngs.h:531
Definition: strngs.h:45
WERD_CHOICE * best_choice
Definition: pageres.h:241
WERD_CHOICE_LIST best_choices
Definition: pageres.h:249

◆ RawUTF8()

const char* WERD_RES::RawUTF8 ( int  blob_index) const
inline

Definition at line 376 of file pageres.h.

376  {
377  if (blob_index < 0 || blob_index >= raw_choice->length())
378  return nullptr;
379  UNICHAR_ID id = raw_choice->unichar_id(blob_index);
380  if (id < 0 || id >= uch_set->size())
381  return nullptr;
382  return uch_set->id_to_unichar(id);
383  }
int UNICHAR_ID
Definition: unichar.h:34
int size() const
Definition: unicharset.h:341
int length() const
Definition: ratngs.h:293
WERD_CHOICE * raw_choice
Definition: pageres.h:246
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:305
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:291
const UNICHARSET * uch_set
Definition: pageres.h:203

◆ RebuildBestState()

void WERD_RES::RebuildBestState ( )

Definition at line 808 of file pageres.cpp.

808  {
809  ASSERT_HOST(best_choice != nullptr);
810  delete rebuild_word;
811  rebuild_word = new TWERD;
812  if (seam_array.empty())
814  best_state.truncate(0);
815  int start = 0;
816  for (int i = 0; i < best_choice->length(); ++i) {
817  int length = best_choice->state(i);
818  best_state.push_back(length);
819  if (length > 1) {
821  start + length - 1);
822  }
823  TBLOB* blob = chopped_word->blobs[start];
824  rebuild_word->blobs.push_back(new TBLOB(*blob));
825  if (length > 1) {
827  start + length - 1);
828  }
829  start += length;
830  }
831 }
bool empty() const
Definition: genericvector.h:91
Definition: blobs.h:418
int length() const
Definition: ratngs.h:293
GenericVector< TBLOB * > blobs
Definition: blobs.h:459
Definition: blobs.h:284
TWERD * rebuild_word
Definition: pageres.h:266
void start_seam_list(TWERD *word, GenericVector< SEAM *> *seam_array)
Definition: seam.cpp:263
static void JoinPieces(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int first, int last)
Definition: seam.cpp:210
void truncate(int size)
TWERD * chopped_word
Definition: pageres.h:212
GenericVector< SEAM * > seam_array
Definition: pageres.h:214
GenericVector< int > best_state
Definition: pageres.h:285
int state(int index) const
Definition: ratngs.h:309
int push_back(T object)
WERD_CHOICE * best_choice
Definition: pageres.h:241
static void BreakPieces(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int first, int last)
Definition: seam.cpp:188
#define ASSERT_HOST(x)
Definition: errcode.h:88

◆ ReplaceBestChoice()

void WERD_RES::ReplaceBestChoice ( WERD_CHOICE choice)

Definition at line 795 of file pageres.cpp.

795  {
796  best_choice = choice;
798  SetupBoxWord();
799  // Make up a fake reject map of the right length to keep the
800  // rejection pass happy.
804 }
REJMAP reject_map
Definition: pageres.h:294
void SetupBoxWord()
Definition: pageres.cpp:849
int length() const
Definition: genericvector.h:86
bool tess_accepted
Definition: pageres.h:303
void SetScriptPositions()
Definition: pageres.cpp:858
bool tess_would_adapt
Definition: pageres.h:304
void initialise(int16_t length)
Definition: rejctmap.cpp:273
void RebuildBestState()
Definition: pageres.cpp:808
GenericVector< int > best_state
Definition: pageres.h:285
WERD_CHOICE * best_choice
Definition: pageres.h:241
bool done
Definition: pageres.h:305

◆ SetAllScriptPositions()

void WERD_RES::SetAllScriptPositions ( tesseract::ScriptPos  position)

Definition at line 865 of file pageres.cpp.

865  {
867  WERD_CHOICE_IT wc_it(&best_choices);
868  for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward())
869  wc_it.data()->SetAllScriptPositions(position);
870 }
WERD_CHOICE * raw_choice
Definition: pageres.h:246
void SetAllScriptPositions(tesseract::ScriptPos position)
Definition: ratngs.cpp:627
WERD_CHOICE_LIST best_choices
Definition: pageres.h:249

◆ SetScriptPositions()

void WERD_RES::SetScriptPositions ( )

Definition at line 858 of file pageres.cpp.

858  {
860 }
void SetScriptPositions(bool small_caps, TWERD *word, int debug=0)
Definition: ratngs.cpp:554
bool small_caps
Definition: pageres.h:306
TWERD * chopped_word
Definition: pageres.h:212
WERD_CHOICE * best_choice
Definition: pageres.h:241

◆ SetupBasicsFromChoppedWord()

void WERD_RES::SetupBasicsFromChoppedWord ( const UNICHARSET unicharset_in)

Definition at line 343 of file pageres.cpp.

343  {
348 }
void start_seam_list(TWERD *word, GenericVector< SEAM *> *seam_array)
Definition: seam.cpp:263
void SetupBlobWidthsAndGaps()
Definition: pageres.cpp:400
TWERD * chopped_word
Definition: pageres.h:212
tesseract::BoxWord * bln_boxes
Definition: pageres.h:195
GenericVector< SEAM * > seam_array
Definition: pageres.h:214
void ClearWordChoices()
Definition: pageres.cpp:1129
static BoxWord * CopyFromNormalized(TWERD *tessword)
Definition: boxword.cpp:56

◆ SetupBlamerBundle()

void WERD_RES::SetupBlamerBundle ( )

Definition at line 393 of file pageres.cpp.

393  {
394  if (blamer_bundle != nullptr) {
396  }
397 }
BlamerBundle * blamer_bundle
Definition: pageres.h:252
DENORM denorm
Definition: pageres.h:201
void SetupNormTruthWord(const DENORM &denorm)
Definition: blamer.cpp:153

◆ SetupBlobWidthsAndGaps()

void WERD_RES::SetupBlobWidthsAndGaps ( )

Definition at line 400 of file pageres.cpp.

400  {
402  blob_gaps.truncate(0);
403  int num_blobs = chopped_word->NumBlobs();
404  for (int b = 0; b < num_blobs; ++b) {
405  TBLOB *blob = chopped_word->blobs[b];
406  TBOX box = blob->bounding_box();
407  blob_widths.push_back(box.width());
408  if (b + 1 < num_blobs) {
410  chopped_word->blobs[b + 1]->bounding_box().left() - box.right());
411  }
412  }
413 }
int16_t width() const
Definition: rect.h:115
int NumBlobs() const
Definition: blobs.h:448
GenericVector< TBLOB * > blobs
Definition: blobs.h:459
Definition: blobs.h:284
void truncate(int size)
TWERD * chopped_word
Definition: pageres.h:212
GenericVector< int > blob_widths
Definition: pageres.h:216
GenericVector< int > blob_gaps
Definition: pageres.h:219
Definition: rect.h:34
int push_back(T object)
TBOX bounding_box() const
Definition: blobs.cpp:468
int16_t right() const
Definition: rect.h:79

◆ SetupBoxWord()

void WERD_RES::SetupBoxWord ( )

Definition at line 849 of file pageres.cpp.

849  {
850  delete box_word;
854 }
const BLOCK * block() const
Definition: normalis.h:273
TWERD * rebuild_word
Definition: pageres.h:266
void ClipToOriginalWord(const BLOCK *block, WERD *original_word)
Definition: boxword.cpp:92
DENORM denorm
Definition: pageres.h:201
WERD * word
Definition: pageres.h:186
void ComputeBoundingBoxes()
Definition: blobs.cpp:855
tesseract::BoxWord * box_word
Definition: pageres.h:272
static BoxWord * CopyFromNormalized(TWERD *tessword)
Definition: boxword.cpp:56

◆ SetupFake()

void WERD_RES::SetupFake ( const UNICHARSET uch)

Definition at line 352 of file pageres.cpp.

352  {
353  ClearResults();
354  SetupWordScript(unicharset_in);
355  chopped_word = new TWERD;
356  rebuild_word = new TWERD;
359  int blob_count = word->cblob_list()->length();
360  if (blob_count > 0) {
361  auto** fake_choices = new BLOB_CHOICE*[blob_count];
362  // For non-text blocks, just pass any blobs through to the box_word
363  // and call the word failed with a fake classification.
364  C_BLOB_IT b_it(word->cblob_list());
365  int blob_id = 0;
366  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
367  TBOX box = b_it.data()->bounding_box();
368  box_word->InsertBox(box_word->length(), box);
369  fake_choices[blob_id++] = new BLOB_CHOICE;
370  }
371  FakeClassifyWord(blob_count, fake_choices);
372  delete [] fake_choices;
373  } else {
374  auto* word = new WERD_CHOICE(&unicharset_in);
375  word->make_bad();
377  // Ownership of word is taken by *this WERD_RES in LogNewCookedChoice.
378  LogNewCookedChoice(1, false, word);
379  }
380  tess_failed = true;
381  done = true;
382 }
void InsertBox(int index, const TBOX &box)
Definition: boxword.cpp:148
Definition: blobs.h:418
TWERD * rebuild_word
Definition: pageres.h:266
C_BLOB_LIST * cblob_list()
Definition: werd.h:95
TWERD * chopped_word
Definition: pageres.h:212
void FakeClassifyWord(int blob_count, BLOB_CHOICE **choices)
Definition: pageres.cpp:877
tesseract::BoxWord * bln_boxes
Definition: pageres.h:195
void SetupWordScript(const UNICHARSET &unicharset_in)
Definition: pageres.cpp:384
bool LogNewRawChoice(WERD_CHOICE *word_choice)
Definition: pageres.cpp:604
Definition: rect.h:34
WERD * word
Definition: pageres.h:186
bool tess_failed
Definition: pageres.h:295
void ClearResults()
Definition: pageres.cpp:1104
int length() const
Definition: boxword.h:83
bool LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *word_choice)
Definition: pageres.cpp:620
bool done
Definition: pageres.h:305
tesseract::BoxWord * box_word
Definition: pageres.h:272

◆ SetupForRecognition()

bool WERD_RES::SetupForRecognition ( const UNICHARSET unicharset_in,
tesseract::Tesseract tesseract,
Pix *  pix,
int  norm_mode,
const TBOX norm_box,
bool  numeric_mode,
bool  use_body_size,
bool  allow_detailed_fx,
ROW row,
const BLOCK block 
)

Definition at line 302 of file pageres.cpp.

309  {
310  auto norm_mode_hint =
311  static_cast<tesseract::OcrEngineMode>(norm_mode);
312  tesseract = tess;
313  POLY_BLOCK* pb = block != nullptr ? block->pdblk.poly_block() : nullptr;
314  if ((norm_mode_hint != tesseract::OEM_LSTM_ONLY &&
315  word->cblob_list()->empty()) ||
316  (pb != nullptr && !pb->IsText())) {
317  // Empty words occur when all the blobs have been moved to the rej_blobs
318  // list, which seems to occur frequently in junk.
319  SetupFake(unicharset_in);
320  word->set_flag(W_REP_CHAR, false);
321  return false;
322  }
323  ClearResults();
324  SetupWordScript(unicharset_in);
325  chopped_word = TWERD::PolygonalCopy(allow_detailed_fx, word);
326  float word_xheight = use_body_size && row != nullptr && row->body_size() > 0.0f
327  ? row->body_size() : x_height;
328  chopped_word->BLNormalize(block, row, pix, word->flag(W_INVERSE),
329  word_xheight, baseline_shift, numeric_mode,
330  norm_mode_hint, norm_box, &denorm);
331  blob_row = row;
332  SetupBasicsFromChoppedWord(unicharset_in);
334  int num_blobs = chopped_word->NumBlobs();
335  ratings = new MATRIX(num_blobs, kWordrecMaxNumJoinChunks);
336  tess_failed = false;
337  return true;
338 }
repeated character
Definition: werd.h:38
bool flag(WERD_FLAGS mask) const
Definition: werd.h:117
int NumBlobs() const
Definition: blobs.h:448
ROW * blob_row
Definition: pageres.h:197
bool IsText() const
Definition: polyblk.h:49
float x_height
Definition: pageres.h:316
Definition: matrix.h:578
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:190
C_BLOB_LIST * cblob_list()
Definition: werd.h:95
TWERD * chopped_word
Definition: pageres.h:212
MATRIX * ratings
Definition: pageres.h:237
void SetupWordScript(const UNICHARSET &unicharset_in)
Definition: pageres.cpp:384
void SetupBlamerBundle()
Definition: pageres.cpp:393
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:118
DENORM denorm
Definition: pageres.h:201
static TWERD * PolygonalCopy(bool allow_detailed_fx, WERD *src)
Definition: blobs.cpp:776
WERD * word
Definition: pageres.h:186
void BLNormalize(const BLOCK *block, const ROW *row, Pix *pix, bool inverse, float x_height, float baseline_shift, bool numeric_mode, tesseract::OcrEngineMode hint, const TBOX *norm_box, DENORM *word_denorm)
Definition: blobs.cpp:790
const int kWordrecMaxNumJoinChunks
Definition: pageres.cpp:53
bool tess_failed
Definition: pageres.h:295
void SetupFake(const UNICHARSET &uch)
Definition: pageres.cpp:352
void ClearResults()
Definition: pageres.cpp:1104
float baseline_shift
Definition: pageres.h:318
white on black
Definition: werd.h:41
void SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in)
Definition: pageres.cpp:343
POLY_BLOCK * poly_block() const
Definition: pdblock.h:55
float body_size() const
Definition: ocrrow.h:73

◆ SetupWordScript()

void WERD_RES::SetupWordScript ( const UNICHARSET unicharset_in)

Definition at line 384 of file pageres.cpp.

384  {
385  uch_set = &uch;
386  int script = uch.default_sid();
387  word->set_script_id(script);
388  word->set_flag(W_SCRIPT_HAS_XHEIGHT, uch.script_has_xheight());
389  word->set_flag(W_SCRIPT_IS_LATIN, script == uch.latin_sid());
390 }
int default_sid() const
Definition: unicharset.h:894
Special case latin for y. splitting.
Definition: werd.h:36
x-height concept makes sense.
Definition: werd.h:35
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:118
WERD * word
Definition: pageres.h:186
const UNICHARSET * uch_set
Definition: pageres.h:203
void set_script_id(int id)
Definition: werd.h:104

◆ StatesAllValid()

bool WERD_RES::StatesAllValid ( )

Definition at line 458 of file pageres.cpp.

458  {
459  int ratings_dim = ratings->dimension();
460  if (raw_choice->TotalOfStates() != ratings_dim) {
461  tprintf("raw_choice has total of states = %d vs ratings dim of %d\n",
462  raw_choice->TotalOfStates(), ratings_dim);
463  return false;
464  }
465  WERD_CHOICE_IT it(&best_choices);
466  int index = 0;
467  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
468  WERD_CHOICE* choice = it.data();
469  if (choice->TotalOfStates() != ratings_dim) {
470  tprintf("Cooked #%d has total of states = %d vs ratings dim of %d\n",
471  index, choice->TotalOfStates(), ratings_dim);
472  return false;
473  }
474  }
475  return true;
476 }
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
WERD_CHOICE * raw_choice
Definition: pageres.h:246
MATRIX * ratings
Definition: pageres.h:237
int TotalOfStates() const
Definition: ratngs.cpp:715
int dimension() const
Definition: matrix.h:536
WERD_CHOICE_LIST best_choices
Definition: pageres.h:249

◆ SymbolDirection()

UNICHARSET::Direction WERD_RES::SymbolDirection ( int  blob_index) const
inline

Definition at line 385 of file pageres.h.

385  {
386  if (best_choice == nullptr ||
387  blob_index >= best_choice->length() ||
388  blob_index < 0)
390  return uch_set->get_direction(best_choice->unichar_id(blob_index));
391  }
int length() const
Definition: ratngs.h:293
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:305
Direction get_direction(UNICHAR_ID unichar_id) const
Definition: unicharset.h:690
const UNICHARSET * uch_set
Definition: pageres.h:203
WERD_CHOICE * best_choice
Definition: pageres.h:241

◆ UnicharsInReadingOrder()

bool WERD_RES::UnicharsInReadingOrder ( ) const
inline

Definition at line 427 of file pageres.h.

427  {
429  }
WERD_CHOICE * best_choice
Definition: pageres.h:241
bool unichars_in_script_order() const
Definition: ratngs.h:525

Member Data Documentation

◆ baseline_shift

float WERD_RES::baseline_shift = 0.0f

Definition at line 318 of file pageres.h.

◆ best_choice

WERD_CHOICE* WERD_RES::best_choice = nullptr

Definition at line 241 of file pageres.h.

◆ best_choices

WERD_CHOICE_LIST WERD_RES::best_choices

Definition at line 249 of file pageres.h.

◆ best_state

GenericVector<int> WERD_RES::best_state

Definition at line 285 of file pageres.h.

◆ blamer_bundle

BlamerBundle* WERD_RES::blamer_bundle = nullptr

Definition at line 252 of file pageres.h.

◆ bln_boxes

tesseract::BoxWord* WERD_RES::bln_boxes = nullptr

Definition at line 195 of file pageres.h.

◆ blob_gaps

GenericVector<int> WERD_RES::blob_gaps

Definition at line 219 of file pageres.h.

◆ blob_row

ROW* WERD_RES::blob_row = nullptr

Definition at line 197 of file pageres.h.

◆ blob_widths

GenericVector<int> WERD_RES::blob_widths

Definition at line 216 of file pageres.h.

◆ box_word

tesseract::BoxWord* WERD_RES::box_word = nullptr

Definition at line 272 of file pageres.h.

◆ caps_height

float WERD_RES::caps_height = 0.0f

Definition at line 317 of file pageres.h.

◆ chopped_word

TWERD* WERD_RES::chopped_word = nullptr

Definition at line 212 of file pageres.h.

◆ combination

bool WERD_RES::combination = false

Definition at line 339 of file pageres.h.

◆ correct_text

GenericVector<STRING> WERD_RES::correct_text

Definition at line 289 of file pageres.h.

◆ CTC_symbol_choices

std::vector<std::vector<std::pair<const char*, float> > > WERD_RES::CTC_symbol_choices

Definition at line 226 of file pageres.h.

◆ denorm

DENORM WERD_RES::denorm

Definition at line 201 of file pageres.h.

◆ done

bool WERD_RES::done = false

Definition at line 305 of file pageres.h.

◆ end

int WERD_RES::end = 0

Definition at line 230 of file pageres.h.

◆ ep_choice

WERD_CHOICE* WERD_RES::ep_choice = nullptr

Definition at line 293 of file pageres.h.

◆ fontinfo

const FontInfo* WERD_RES::fontinfo = nullptr

Definition at line 309 of file pageres.h.

◆ fontinfo2

const FontInfo* WERD_RES::fontinfo2 = nullptr

Definition at line 310 of file pageres.h.

◆ fontinfo_id2_count

int8_t WERD_RES::fontinfo_id2_count = 0

Definition at line 312 of file pageres.h.

◆ fontinfo_id_count

int8_t WERD_RES::fontinfo_id_count = 0

Definition at line 311 of file pageres.h.

◆ guessed_caps_ht

bool WERD_RES::guessed_caps_ht = true

Definition at line 314 of file pageres.h.

◆ guessed_x_ht

bool WERD_RES::guessed_x_ht = true

Definition at line 313 of file pageres.h.

◆ leading_space

bool WERD_RES::leading_space = false

Definition at line 228 of file pageres.h.

◆ odd_size

bool WERD_RES::odd_size = false

Definition at line 307 of file pageres.h.

◆ part_of_combo

bool WERD_RES::part_of_combo = false

Definition at line 340 of file pageres.h.

◆ ratings

MATRIX* WERD_RES::ratings = nullptr

Definition at line 237 of file pageres.h.

◆ raw_choice

WERD_CHOICE* WERD_RES::raw_choice = nullptr

Definition at line 246 of file pageres.h.

◆ rebuild_word

TWERD* WERD_RES::rebuild_word = nullptr

Definition at line 266 of file pageres.h.

◆ reject_map

REJMAP WERD_RES::reject_map

Definition at line 294 of file pageres.h.

◆ reject_spaces

bool WERD_RES::reject_spaces = false

Definition at line 341 of file pageres.h.

◆ seam_array

GenericVector<SEAM*> WERD_RES::seam_array

Definition at line 214 of file pageres.h.

◆ segmented_timesteps

std::vector<std::vector<std::vector< std::pair<const char*, float> > > > WERD_RES::segmented_timesteps

Definition at line 224 of file pageres.h.

◆ small_caps

bool WERD_RES::small_caps = false

Definition at line 306 of file pageres.h.

◆ space_certainty

float WERD_RES::space_certainty = 0.0f

Definition at line 321 of file pageres.h.

◆ tess_accepted

bool WERD_RES::tess_accepted = false

Definition at line 303 of file pageres.h.

◆ tess_failed

bool WERD_RES::tess_failed = false

Definition at line 295 of file pageres.h.

◆ tess_would_adapt

bool WERD_RES::tess_would_adapt = false

Definition at line 304 of file pageres.h.

◆ tesseract

tesseract::Tesseract* WERD_RES::tesseract = nullptr

Definition at line 280 of file pageres.h.

◆ timesteps

std::vector<std::vector<std::pair<const char*, float> > > WERD_RES::timesteps

Definition at line 221 of file pageres.h.

◆ uch_set

const UNICHARSET* WERD_RES::uch_set = nullptr

Definition at line 203 of file pageres.h.

◆ unlv_crunch_mode

CRUNCH_MODE WERD_RES::unlv_crunch_mode = CR_NONE

Definition at line 315 of file pageres.h.

◆ word

WERD* WERD_RES::word = nullptr

Definition at line 186 of file pageres.h.

◆ x_height

float WERD_RES::x_height = 0.0f

Definition at line 316 of file pageres.h.


The documentation for this class was generated from the following files: