Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Renamed GetGlyphConfidences() to GetChoices() and glyph_confidences t… #1997

Merged
merged 1 commit into from
Oct 17, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions src/api/baseapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1560,8 +1560,8 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {

// Now, process the word...
std::vector<std::vector<std::pair<const char*, float>>>* confidencemap = nullptr;
if (tesseract_->glyph_confidences) {
confidencemap = res_it->GetGlyphConfidences();
if (tesseract_->lstm_choice_mode) {
confidencemap = res_it->GetChoices();
}
hocr_str += "\n <span class='ocrx_word'";
AddIdTohOCR(&hocr_str, "word", page_id, wcnt);
Expand Down Expand Up @@ -1621,16 +1621,16 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
if (italic) hocr_str += "</em>";
if (bold) hocr_str += "</strong>";
// If glyph confidence is required it is added here
if (tesseract_->glyph_confidences == 1 && confidencemap != nullptr) {
// If the lstm choice mode is required it is added here
if (tesseract_->lstm_choice_mode == 1 && confidencemap != nullptr) {
for (size_t i = 0; i < confidencemap->size(); i++) {
hocr_str += "\n <span class='ocrx_cinfo'";
AddIdTohOCR(&hocr_str, "timestep", page_id, wcnt, tcnt);
hocr_str += ">";
std::vector<std::pair<const char*, float>> timestep = (*confidencemap)[i];
for (std::pair<const char*, float> conf : timestep) {
hocr_str += "<span class='ocr_glyph'";
AddIdTohOCR(&hocr_str, "glyph", page_id, wcnt, gcnt);
AddIdTohOCR(&hocr_str, "choice", page_id, wcnt, gcnt);
hocr_str.add_str_int(" title='x_confs ", int(conf.second * 100));
hocr_str += "'";
hocr_str += ">";
Expand All @@ -1641,18 +1641,18 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
hocr_str += "</span>";
tcnt++;
}
} else if (tesseract_->glyph_confidences == 2 && confidencemap != nullptr) {
} else if (tesseract_->lstm_choice_mode == 2 && confidencemap != nullptr) {
for (size_t i = 0; i < confidencemap->size(); i++) {
std::vector<std::pair<const char*, float>> timestep = (*confidencemap)[i];
if (timestep.size() > 0) {
hocr_str += "\n <span class='ocrx_cinfo'";
AddIdTohOCR(&hocr_str, "alternative_glyphs", page_id, wcnt, tcnt);
AddIdTohOCR(&hocr_str, "lstm_choices", page_id, wcnt, tcnt);
hocr_str += " chosen='";
hocr_str += timestep[0].first;
hocr_str += "'>";
for (size_t j = 1; j < timestep.size(); j++) {
hocr_str += "<span class='ocr_glyph'";
AddIdTohOCR(&hocr_str, "glyph", page_id, wcnt, gcnt);
AddIdTohOCR(&hocr_str, "choice", page_id, wcnt, gcnt);
hocr_str.add_str_int(" title='x_confs ", int(timestep[j].second * 100));
hocr_str += "'";
hocr_str += ">";
Expand Down
2 changes: 1 addition & 1 deletion src/ccmain/linerec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ void Tesseract::LSTMRecognizeWord(const BLOCK& block, ROW *row, WERD_RES *word,
if (im_data == nullptr) return;
lstm_recognizer_->RecognizeLine(*im_data, true, classify_debug_level > 0,
kWorstDictCertainty / kCertaintyScale,
word_box, words, glyph_confidences);
word_box, words, lstm_choice_mode);
delete im_data;
SearchWords(words);
}
Expand Down
2 changes: 1 addition & 1 deletion src/ccmain/resultiterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,7 @@ char* ResultIterator::GetUTF8Text(PageIteratorLevel level) const {
return result;
}

std::vector<std::vector<std::pair<const char*, float>>>* ResultIterator::GetGlyphConfidences() const {
std::vector<std::vector<std::pair<const char*, float>>>* ResultIterator::GetChoices() const {
if (it_->word() != nullptr) {
return &it_->word()->timesteps;
} else {
Expand Down
4 changes: 2 additions & 2 deletions src/ccmain/resultiterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,9 @@ class TESS_API ResultIterator : public LTRResultIterator {
virtual char* GetUTF8Text(PageIteratorLevel level) const;

/**
* Returns the glyph confidences for every LSTM timestep for the current Word
* Returns the lstm choices for every LSTM timestep for the current Word
*/
virtual std::vector<std::vector<std::pair<const char*, float>>>* GetGlyphConfidences() const;
virtual std::vector<std::vector<std::pair<const char*, float>>>* GetChoices() const;

/**
* Return whether the current paragraph's dominant reading direction
Expand Down
12 changes: 6 additions & 6 deletions src/ccmain/tesseractclass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -514,12 +514,12 @@ Tesseract::Tesseract()
STRING_MEMBER(page_separator, "\f",
"Page separator (default is form feed control character)",
this->params()),
INT_MEMBER(glyph_confidences, 0,
"Allows to include glyph confidences in the hOCR output. "
"Valid input values are 0, 1 and 2. 0 is the default value. "
"With 1 the glyph confidences of all timesteps are included. "
"With 2 the glyph confidences are accumulated per charakter.",
this->params()),
INT_MEMBER(lstm_choice_mode, 0,
"Allows to include alternative symbols choices in the hOCR output. "
"Valid input values are 0, 1 and 2. 0 is the default value. "
"With 1 the alternative symbol choices per timestep are included. "
"With 2 the alternative symbol choices are accumulated per character.",
this->params()),

backup_config_file_(nullptr),
pix_binary_(nullptr),
Expand Down
8 changes: 4 additions & 4 deletions src/ccmain/tesseractclass.h
Original file line number Diff line number Diff line change
Expand Up @@ -1118,11 +1118,11 @@ class Tesseract : public Wordrec {
"Preserve multiple interword spaces");
STRING_VAR_H(page_separator, "\f",
"Page separator (default is form feed control character)");
INT_VAR_H(glyph_confidences, 0,
"Allows to include glyph confidences in the hOCR output. "
INT_VAR_H(lstm_choice_mode, 0,
"Allows to include alternative symbols choices in the hOCR output. "
"Valid input values are 0, 1 and 2. 0 is the default value. "
"With 1 the glyph confidences of all timesteps are included. "
"With 2 the glyph confidences are accumulated per charakter.");
"With 1 the alternative symbol choices per timestep are included. "
"With 2 the alternative symbol choices are accumulated per character.");

//// ambigsrecog.cpp /////////////////////////////////////////////////////////
FILE *init_recog_training(const STRING &fname);
Expand Down
2 changes: 1 addition & 1 deletion src/ccstruct/pageres.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ class WERD_RES : public ELIST_LINK {
// Gaps between blobs in chopped_word. blob_gaps[i] is the gap between
// blob i and blob i+1.
GenericVector<int> blob_gaps;
// Stores the glyph confidences of every timestep of the lstm
// Stores the lstm choices of every timestep
std::vector<std::vector<std::pair<const char*, float>>> timesteps;
// Ratings matrix contains classifier choices for each classified combination
// of blobs. The dimension is the same as the number of blobs in chopped_word
Expand Down
7 changes: 3 additions & 4 deletions src/lstm/lstmrecognizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ void LSTMRecognizer::RecognizeLine(const ImageData& image_data, bool invert,
bool debug, double worst_dict_cert,
const TBOX& line_box,
PointerVector<WERD_RES>* words,
int glyph_confidences) {
int lstm_choice_mode) {
NetworkIO outputs;
float scale_factor;
NetworkIO inputs;
Expand All @@ -185,10 +185,9 @@ void LSTMRecognizer::RecognizeLine(const ImageData& image_data, bool invert,
new RecodeBeamSearch(recoder_, null_char_, SimpleTextOutput(), dict_);
}
search_->Decode(outputs, kDictRatio, kCertOffset, worst_dict_cert,
&GetUnicharset(), glyph_confidences);
&GetUnicharset(), lstm_choice_mode);
search_->ExtractBestPathAsWords(line_box, scale_factor, debug,
&GetUnicharset(), words,
glyph_confidences);
&GetUnicharset(), words, lstm_choice_mode);
}

// Helper computes min and mean best results in the output.
Expand Down
3 changes: 1 addition & 2 deletions src/lstm/lstmrecognizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,7 @@ class LSTMRecognizer {
// will be used in a dictionary word.
void RecognizeLine(const ImageData& image_data, bool invert, bool debug,
double worst_dict_cert, const TBOX& line_box,
PointerVector<WERD_RES>* words,
int glyph_confidences = 0);
PointerVector<WERD_RES>* words, int lstm_choice_mode = 0);

// Helper computes min and mean best results in the output.
void OutputStats(const NetworkIO& outputs,
Expand Down
82 changes: 41 additions & 41 deletions src/lstm/recodebeam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,17 +81,17 @@ RecodeBeamSearch::RecodeBeamSearch(const UnicharCompress& recoder,
// Decodes the set of network outputs, storing the lattice internally.
void RecodeBeamSearch::Decode(const NetworkIO& output, double dict_ratio,
double cert_offset, double worst_dict_cert,
const UNICHARSET* charset, int glyph_confidence) {
const UNICHARSET* charset, int lstm_choice_mode) {
beam_size_ = 0;
int width = output.Width();
if (glyph_confidence)
if (lstm_choice_mode)
timesteps.clear();
for (int t = 0; t < width; ++t) {
ComputeTopN(output.f(t), output.NumFeatures(), kBeamWidths[0]);
DecodeStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert,
charset);
if (glyph_confidence) {
SaveMostCertainGlyphs(output.f(t), output.NumFeatures(), charset, t);
if (lstm_choice_mode) {
SaveMostCertainChoices(output.f(t), output.NumFeatures(), charset, t);
}
}
}
Expand All @@ -107,33 +107,33 @@ void RecodeBeamSearch::Decode(const GENERIC_2D_ARRAY<float>& output,
}
}

void RecodeBeamSearch::SaveMostCertainGlyphs(const float* outputs,
void RecodeBeamSearch::SaveMostCertainChoices(const float* outputs,
int num_outputs,
const UNICHARSET* charset,
int xCoord) {
std::vector<std::pair<const char*, float>> glyphs;
std::vector<std::pair<const char*, float>> choices;
int pos = 0;
for (int i = 0; i < num_outputs; ++i) {
if (outputs[i] >= 0.01f) {
const char* charakter;
const char* character;
if (i + 2 >= num_outputs) {
charakter = "";
character = "";
} else if (i > 0) {
charakter = charset->id_to_unichar_ext(i + 2);
character = charset->id_to_unichar_ext(i + 2);
} else {
charakter = charset->id_to_unichar_ext(i);
character = charset->id_to_unichar_ext(i);
}
pos = 0;
//order the possible glyphs within one timestep
//order the possible choices within one timestep
//beginning with the most likely
while (glyphs.size() > pos && glyphs[pos].second > outputs[i]) {
while (choices.size() > pos && choices[pos].second > outputs[i]) {
pos++;
}
glyphs.insert(glyphs.begin() + pos,
std::pair<const char*, float>(charakter, outputs[i]));
choices.insert(choices.begin() + pos,
std::pair<const char*, float>(character, outputs[i]));
}
}
timesteps.push_back(glyphs);
timesteps.push_back(choices);
}

// Returns the best path as labels/scores/xcoords similar to simple CTC.
Expand Down Expand Up @@ -179,15 +179,15 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX& line_box,
float scale_factor, bool debug,
const UNICHARSET* unicharset,
PointerVector<WERD_RES>* words,
int glyph_confidence) {
int lstm_choice_mode) {
words->truncate(0);
GenericVector<int> unichar_ids;
GenericVector<float> certs;
GenericVector<float> ratings;
GenericVector<int> xcoords;
GenericVector<const RecodeNode*> best_nodes;
GenericVector<const RecodeNode*> second_nodes;
std::deque<std::pair<int,int>> best_glyphs;
std::deque<std::pair<int,int>> best_choices;
ExtractBestPaths(&best_nodes, &second_nodes);
if (debug) {
DebugPath(unicharset, best_nodes);
Expand All @@ -199,15 +199,15 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX& line_box,
}
int current_char;
int timestepEnd = 0;
//if glyph confidence is required in granularity level 2 it stores the x
//Coordinates of every chosen character to match the alternative glyphs to it
if (glyph_confidence == 2) {
//if lstm choice mode is required in granularity level 2 it stores the x
//Coordinates of every chosen character to match the alternative choices to it
if (lstm_choice_mode == 2) {
ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
&xcoords, &best_glyphs);
if (best_glyphs.size() > 0) {
current_char = best_glyphs.front().first;
timestepEnd = best_glyphs.front().second;
best_glyphs.pop_front();
&xcoords, &best_choices);
if (best_choices.size() > 0) {
current_char = best_choices.front().first;
timestepEnd = best_choices.front().second;
best_choices.pop_front();
}
} else {
ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
Expand Down Expand Up @@ -243,25 +243,25 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX& line_box,
WERD_RES* word_res = InitializeWord(
leading_space, line_box, word_start, word_end,
std::min(space_cert, prev_space_cert), unicharset, xcoords, scale_factor);
if (glyph_confidence == 1) {
if (lstm_choice_mode == 1) {
for (size_t i = timestepEnd; i < xcoords[word_end]; i++) {
word_res->timesteps.push_back(timesteps[i]);
}
timestepEnd = xcoords[word_end];
} else if (glyph_confidence == 2) {
} else if (lstm_choice_mode == 2) {
float sum = 0;
std::vector<std::pair<const char*, float>> glyph_pairs;
std::vector<std::pair<const char*, float>> choice_pairs;
for (size_t i = timestepEnd; i < xcoords[word_end]; i++) {
for (std::pair<const char*, float> glyph : timesteps[i]) {
if (std::strcmp(glyph.first, "") != 0) {
sum += glyph.second;
glyph_pairs.push_back(glyph);
for (std::pair<const char*, float> choice : timesteps[i]) {
if (std::strcmp(choice.first, "") != 0) {
sum += choice.second;
choice_pairs.push_back(choice);
}
}
if (best_glyphs.size() > 0 && i == best_glyphs.front().second-1
if (best_choices.size() > 0 && i == best_choices.front().second - 1
|| i == xcoords[word_end]-1) {
std::map<const char*, float> summed_propabilities;
for(auto it = glyph_pairs.begin(); it != glyph_pairs.end(); ++it) {
for (auto it = choice_pairs.begin(); it != choice_pairs.end(); ++it) {
summed_propabilities[it->first] += it->second;
}
std::vector<std::pair<const char*, float>> accumulated_timestep;
Expand All @@ -282,11 +282,11 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX& line_box,
std::pair<const char*,float>(it->first,
it->second));
}
if (best_glyphs.size() > 0) {
current_char = best_glyphs.front().first;
best_glyphs.pop_front();
if (best_choices.size() > 0) {
current_char = best_choices.front().first;
best_choices.pop_front();
}
glyph_pairs.clear();
choice_pairs.clear();
word_res->timesteps.push_back(accumulated_timestep);
sum = 0;
}
Expand Down Expand Up @@ -366,7 +366,7 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(
const GenericVector<const RecodeNode*>& best_nodes,
GenericVector<int>* unichar_ids, GenericVector<float>* certs,
GenericVector<float>* ratings, GenericVector<int>* xcoords,
std::deque<std::pair<int,int>>* best_glyphs) {
std::deque<std::pair<int, int>>* best_choices) {
unichar_ids->truncate(0);
certs->truncate(0);
ratings->truncate(0);
Expand Down Expand Up @@ -395,8 +395,8 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(
}
unichar_ids->push_back(unichar_id);
xcoords->push_back(t);
if(best_glyphs != nullptr) {
best_glyphs->push_back(std::pair<int,int>(unichar_id,t));
if (best_choices != nullptr) {
best_choices->push_back(std::pair<int, int>(unichar_id, t));
}
do {
double cert = best_nodes[t++]->certainty;
Expand Down
10 changes: 5 additions & 5 deletions src/lstm/recodebeam.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ class RecodeBeamSearch {
// If charset is not null, it enables detailed debugging of the beam search.
void Decode(const NetworkIO& output, double dict_ratio, double cert_offset,
double worst_dict_cert, const UNICHARSET* charset,
int glyph_confidence = 0);
int lstm_choice_mode = 0);
void Decode(const GENERIC_2D_ARRAY<float>& output, double dict_ratio,
double cert_offset, double worst_dict_cert,
const UNICHARSET* charset);
Expand All @@ -206,7 +206,7 @@ class RecodeBeamSearch {
void ExtractBestPathAsWords(const TBOX& line_box, float scale_factor,
bool debug, const UNICHARSET* unicharset,
PointerVector<WERD_RES>* words,
int glyph_confidence = 0);
int lstm_choice_mode = 0);

// Generates debug output of the content of the beams after a Decode.
void DebugBeams(const UNICHARSET& unicharset) const;
Expand Down Expand Up @@ -282,7 +282,7 @@ class RecodeBeamSearch {
const GenericVector<const RecodeNode*>& best_nodes,
GenericVector<int>* unichar_ids, GenericVector<float>* certs,
GenericVector<float>* ratings, GenericVector<int>* xcoords,
std::deque<std::pair<int,int>>* best_glyphs = nullptr);
std::deque<std::pair<int,int>>* best_choices = nullptr);

// Sets up a word with the ratings matrix and fake blobs with boxes in the
// right places.
Expand All @@ -303,8 +303,8 @@ class RecodeBeamSearch {
double cert_offset, double worst_dict_cert,
const UNICHARSET* charset, bool debug = false);

//Saves the most certain glyphs for the current time-step
void SaveMostCertainGlyphs(const float* outputs, int num_outputs, const UNICHARSET* charset, int xCoord);
//Saves the most certain choices for the current time-step
void SaveMostCertainChoices(const float* outputs, int num_outputs, const UNICHARSET* charset, int xCoord);

// Adds to the appropriate beams the legal (according to recoder)
// continuations of context prev, which is from the given index to beams_,
Expand Down