20 #ifndef TESSERACT_TRAINING_VALIDATOR_H_ 21 #define TESSERACT_TRAINING_VALIDATOR_H_ 82 const std::vector<char32>& src,
83 std::vector<std::vector<char32>>*
dest);
151 const std::vector<char32>& src,
152 std::vector<std::vector<char32>>*
dest);
155 std::vector<std::vector<char32>>*
dest);
160 const std::vector<char32>& utf32);
246 #endif // TESSERACT_TRAINING_VALIDATOR_H_
static ViramaScript MostFrequentViramaScript(const std::vector< char32 > &utf32)
void MultiCodePart(unsigned length)
static const char32 kMinIndicUnicode
static const char32 kInvalid
static const char32 kRightToLeftMark
Validator(ViramaScript script, bool report_errors)
static const char32 kKhmerVirama
std::vector< std::vector< char32 > > parts_
virtual CharClass UnicodeToCharClass(char32 ch) const =0
bool IsSubscriptScript() const
static bool IsVirama(char32 unicode)
static const char32 kZeroWidthSpace
static const char32 kMaxJavaneseUnicode
static bool IsZeroWidthMark(char32 ch)
std::pair< CharClass, char32 > IndicPair
static const char32 kMaxViramaScriptUnicode
void ComputeClassCodes(const std::vector< char32 > &text)
static const int kIndicCodePageSize
static const char32 kZeroWidthNonJoiner
bool UseMultiCode(unsigned length)
static bool IsVedicAccent(char32 unicode)
static const char32 kSinhalaVirama
static const char32 kZeroWidthJoiner
static const char32 kMaxSinhalaUnicode
static const char32 kLeftToRightMark
std::vector< char32 > output_
static const char32 kJavaneseVirama
static const char32 kMyanmarVirama
static std::unique_ptr< Validator > ScriptValidator(ViramaScript script, bool report_errors)
bool ValidateCleanAndSegmentInternal(GraphemeNormMode g_mode, const std::vector< char32 > &src, std::vector< std::vector< char32 >> *dest)
std::vector< IndicPair > codes_
void MoveResultsToDest(GraphemeNormMode g_mode, std::vector< std::vector< char32 >> *dest)
static bool ValidateCleanAndSegment(GraphemeNormMode g_mode, bool report_errors, const std::vector< char32 > &src, std::vector< std::vector< char32 >> *dest)
virtual bool ConsumeGraphemeIfValid()=0