Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

trying to add user words/patterns again: #2324

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/ccmain/tessedit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,7 @@ bool Tesseract::init_tesseract_lang_data(
#endif // ndef DISABLED_LEGACY_ENGINE
if (mgr->IsComponentAvailable(TESSDATA_LSTM)) {
lstm_recognizer_ = new LSTMRecognizer;
ASSERT_HOST(
lstm_recognizer_->Load(lstm_use_matrix ? language : nullptr, mgr));
ASSERT_HOST(lstm_recognizer_->Load(this->params(), lstm_use_matrix ? language : nullptr, mgr));
} else {
tprintf("Error: LSTM requested, but not present!! Loading tesseract.\n");
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
Expand Down
48 changes: 48 additions & 0 deletions src/ccutil/params.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,4 +213,52 @@ void ParamUtils::ResetToDefaults(ParamsVectors* member_params) {
}
}

// Resets all shared parameters by their pattern values
void ParamUtils::ResetFromParams(ParamsVectors* member_params,
const ParamsVectors* pattern_params) {
if (member_params != nullptr && pattern_params != nullptr) {
int i;
for (i = 0; i < member_params->int_params.size(); ++i) {
IntParam& member = *member_params->int_params[i];
IntParam* param = ParamUtils::FindParam<IntParam>(member.name_str(),
pattern_params->int_params,
pattern_params->int_params);
if (param) {
//printf("overriding param %s=%d by =%d\n", member.name_str(), member, *param);
member.set_value(*param);
}
}
for (i = 0; i < member_params->bool_params.size(); ++i) {
BoolParam& member = *member_params->bool_params[i];
BoolParam* param = ParamUtils::FindParam<BoolParam>(member.name_str(),
pattern_params->bool_params,
pattern_params->bool_params);
if (param) {
//printf("overriding param %s=%s by =%s\n", member.name_str(), member ? "true" : "false", *param ? "true" : "false");
member.set_value(*param);
}
}
for (i = 0; i < member_params->string_params.size(); ++i) {
StringParam& member = *member_params->string_params[i];
StringParam* param = ParamUtils::FindParam<StringParam>(member.name_str(),
pattern_params->string_params,
pattern_params->string_params);
if (param) {
//printf("overriding param %s=%s by =%s\n", member.name_str(), member, *param);
member.set_value(*param);
}
}
for (i = 0; i < member_params->double_params.size(); ++i) {
DoubleParam& member = *member_params->double_params[i];
DoubleParam* param = ParamUtils::FindParam<DoubleParam>(member.name_str(),
pattern_params->double_params,
pattern_params->double_params);
if (param) {
//printf("overriding param %s=%f by =%f\n", member.name_str(), member, *param);
member.set_value(*param);
}
}
}
}

} // namespace tesseract
4 changes: 4 additions & 0 deletions src/ccutil/params.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,10 @@ class ParamUtils {

// Resets all parameters back to default values;
static void ResetToDefaults(ParamsVectors* member_params);

// Resets all shared parameters by their pattern values
static void ResetFromParams(ParamsVectors* member_params,
const ParamsVectors* pattern_params);
};

// Definition of various parameter types.
Expand Down
3 changes: 3 additions & 0 deletions src/dict/dict.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,9 @@ void Dict::Load(const STRING &lang, TessdataManager *data_file) {
getUnicharset().size(), dawg_debug_level);
}

// TODO(bertsky): unused, remove unless reasons appear to keep separate loaders
// for Tesseract and LSTMRecognizer (same as Dict::Load without user words/patterns,
// and document/pending words)
// Loads the dawgs needed by the LSTM model. Call FinishLoad() after.
void Dict::LoadLSTM(const STRING &lang, TessdataManager *data_file) {
// Load dawgs_.
Expand Down
10 changes: 6 additions & 4 deletions src/lstm/lstmrecognizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,13 @@ LSTMRecognizer::~LSTMRecognizer() {
}

// Loads a model from mgr, including the dictionary only if lang is not null.
bool LSTMRecognizer::Load(const char* lang, TessdataManager* mgr) {
bool LSTMRecognizer::Load(const ParamsVectors* params, const char* lang, TessdataManager* mgr) {
TFile fp;
if (!mgr->GetComponent(TESSDATA_LSTM, &fp)) return false;
if (!DeSerialize(mgr, &fp)) return false;
if (lang == nullptr) return true;
// Allow it to run without a dictionary.
LoadDictionary(lang, mgr);
LoadDictionary(params, lang, mgr);
return true;
}

Expand Down Expand Up @@ -154,11 +154,13 @@ bool LSTMRecognizer::LoadRecoder(TFile* fp) {
// on the unicharset matching. This enables training to deserialize a model
// from checkpoint or restore without having to go back and reload the
// dictionary.
bool LSTMRecognizer::LoadDictionary(const char* lang, TessdataManager* mgr) {
bool LSTMRecognizer::LoadDictionary(const ParamsVectors* params, const char* lang, TessdataManager* mgr) {
delete dict_;
dict_ = new Dict(&ccutil_);
// get the variables configured at Tesseract instance via config/API:
ParamUtils::ResetFromParams(ccutil_.params(), params);
dict_->SetupForLoad(Dict::GlobalDawgCache());
dict_->LoadLSTM(lang, mgr);
dict_->Load(lang, mgr);
if (dict_->FinishLoad()) return true; // Success.
tprintf("Failed to load any lstm-specific dictionaries for lang %s!!\n",
lang);
Expand Down
5 changes: 3 additions & 2 deletions src/lstm/lstmrecognizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "matrix.h"
#include "network.h"
#include "networkscratch.h"
#include "params.h"
#include "recodebeam.h"
#include "series.h"
#include "strngs.h"
Expand Down Expand Up @@ -154,7 +155,7 @@ class LSTMRecognizer {
int null_char() const { return null_char_; }

// Loads a model from mgr, including the dictionary only if lang is not null.
bool Load(const char* lang, TessdataManager* mgr);
bool Load(const ParamsVectors* params, const char* lang, TessdataManager* mgr);

// Writes to the given file. Returns false in case of error.
// If mgr contains a unicharset and recoder, then they are not encoded to fp.
Expand All @@ -174,7 +175,7 @@ class LSTMRecognizer {
// on the unicharset matching. This enables training to deserialize a model
// from checkpoint or restore without having to go back and reload the
// dictionary.
bool LoadDictionary(const char* lang, TessdataManager* mgr);
bool LoadDictionary(const ParamsVectors* params, const char* lang, TessdataManager* mgr);

// Recognizes the line image, contained within image_data, returning the
// recognized tesseract WERD_RES for the words.
Expand Down