Replace remaining GenericVector by std::vector for src/dict

Signed-off-by: Stefan Weil <sw@weilnetz.de>
rvrsh3ll · Mar 16, 2021 · bf42f83 · bf42f83
1 parent 17eee86
commit bf42f83
Show file tree

Hide file tree

Showing 10 changed files with 67 additions and 63 deletions.
diff --git a/src/ccstruct/params_training_featdef.h b/src/ccstruct/params_training_featdef.h
@@ -19,6 +19,7 @@
 #ifndef TESSERACT_WORDREC_PARAMS_TRAINING_FEATDEF_H_
 #define TESSERACT_WORDREC_PARAMS_TRAINING_FEATDEF_H_
 
+#include <cstring> // for memset
 #include <string>
 #include <vector>
 

diff --git a/src/dict/dawg.h b/src/dict/dawg.h
@@ -57,9 +57,9 @@ struct NodeChild {
   NodeChild() : unichar_id(INVALID_UNICHAR_ID), edge_ref(NO_EDGE) {}
 };
 
-using NodeChildVector = GenericVector<NodeChild>;
-using SuccessorList = GenericVector<int>;
-using SuccessorListsVector = GenericVector<SuccessorList *>;
+using NodeChildVector = std::vector<NodeChild>;
+using SuccessorList = std::vector<int>;
+using SuccessorListsVector = std::vector<SuccessorList *>;
 
 enum DawgType {
   DAWG_TYPE_PUNCTUATION,
@@ -176,7 +176,7 @@ class TESS_API Dawg {
   /// Fills vec with unichar ids that represent the character classes
   /// of the given unichar_id.
   virtual void unichar_id_to_patterns(UNICHAR_ID unichar_id, const UNICHARSET &unicharset,
-                                      GenericVector<UNICHAR_ID> *vec) const {
+                                      std::vector<UNICHAR_ID> *vec) const {
     (void)unichar_id;
     (void)unicharset;
     (void)vec;
@@ -355,15 +355,16 @@ struct DawgPosition {
   bool back_to_punc = false;
 };
 
-class DawgPositionVector : public GenericVector<DawgPosition> {
+class DawgPositionVector : public std::vector<DawgPosition> {
 public:
   /// Adds an entry for the given dawg_index with the given node to the vec.
   /// Returns false if the same entry already exists in the vector,
   /// true otherwise.
   inline bool add_unique(const DawgPosition &new_pos, bool debug, const char *debug_msg) {
-    for (int i = 0; i < size(); ++i) {
-      if (data_[i] == new_pos)
+    for (auto position : *this) {
+      if (position == new_pos) {
         return false;
+      }
     }
     push_back(new_pos);
     if (debug) {

diff --git a/src/dict/dict.cpp b/src/dict/dict.cpp
@@ -201,19 +201,19 @@ void Dict::Load(const std::string &lang, TessdataManager *data_file) {
     punc_dawg_ =
         dawg_cache_->GetSquishedDawg(lang, TESSDATA_PUNC_DAWG, dawg_debug_level, data_file);
     if (punc_dawg_)
-      dawgs_ += punc_dawg_;
+      dawgs_.push_back(punc_dawg_);
   }
   if (load_system_dawg) {
     Dawg *system_dawg =
         dawg_cache_->GetSquishedDawg(lang, TESSDATA_SYSTEM_DAWG, dawg_debug_level, data_file);
     if (system_dawg)
-      dawgs_ += system_dawg;
+      dawgs_.push_back(system_dawg);
   }
   if (load_number_dawg) {
     Dawg *number_dawg =
         dawg_cache_->GetSquishedDawg(lang, TESSDATA_NUMBER_DAWG, dawg_debug_level, data_file);
     if (number_dawg)
-      dawgs_ += number_dawg;
+      dawgs_.push_back(number_dawg);
   }
   if (load_bigram_dawg) {
     bigram_dawg_ =
@@ -225,13 +225,13 @@ void Dict::Load(const std::string &lang, TessdataManager *data_file) {
     freq_dawg_ =
         dawg_cache_->GetSquishedDawg(lang, TESSDATA_FREQ_DAWG, dawg_debug_level, data_file);
     if (freq_dawg_)
-      dawgs_ += freq_dawg_;
+      dawgs_.push_back(freq_dawg_);
   }
   if (load_unambig_dawg) {
     unambig_dawg_ =
         dawg_cache_->GetSquishedDawg(lang, TESSDATA_UNAMBIG_DAWG, dawg_debug_level, data_file);
     if (unambig_dawg_)
-      dawgs_ += unambig_dawg_;
+      dawgs_.push_back(unambig_dawg_);
   }
 
   std::string name;
@@ -249,7 +249,7 @@ void Dict::Load(const std::string &lang, TessdataManager *data_file) {
       tprintf("Error: failed to load %s\n", name.c_str());
       delete trie_ptr;
     } else {
-      dawgs_ += trie_ptr;
+      dawgs_.push_back(trie_ptr);
     }
   }
 
@@ -267,13 +267,13 @@ void Dict::Load(const std::string &lang, TessdataManager *data_file) {
       tprintf("Error: failed to load %s\n", name.c_str());
       delete trie_ptr;
     } else {
-      dawgs_ += trie_ptr;
+      dawgs_.push_back(trie_ptr);
     }
   }
 
   document_words_ =
       new Trie(DAWG_TYPE_WORD, lang, DOC_DAWG_PERM, getUnicharset().size(), dawg_debug_level);
-  dawgs_ += document_words_;
+  dawgs_.push_back(document_words_);
 
   // This dawg is temporary and should not be searched by letter_is_ok.
   pending_words_ =
@@ -287,19 +287,19 @@ void Dict::LoadLSTM(const std::string &lang, TessdataManager *data_file) {
     punc_dawg_ =
         dawg_cache_->GetSquishedDawg(lang, TESSDATA_LSTM_PUNC_DAWG, dawg_debug_level, data_file);
     if (punc_dawg_)
-      dawgs_ += punc_dawg_;
+      dawgs_.push_back(punc_dawg_);
   }
   if (load_system_dawg) {
     Dawg *system_dawg =
         dawg_cache_->GetSquishedDawg(lang, TESSDATA_LSTM_SYSTEM_DAWG, dawg_debug_level, data_file);
     if (system_dawg)
-      dawgs_ += system_dawg;
+      dawgs_.push_back(system_dawg);
   }
   if (load_number_dawg) {
     Dawg *number_dawg =
         dawg_cache_->GetSquishedDawg(lang, TESSDATA_LSTM_NUMBER_DAWG, dawg_debug_level, data_file);
     if (number_dawg)
-      dawgs_ += number_dawg;
+      dawgs_.push_back(number_dawg);
   }
 
   // stolen from Dict::Load (but needs params_ from Tesseract
@@ -319,7 +319,7 @@ void Dict::LoadLSTM(const std::string &lang, TessdataManager *data_file) {
       tprintf("Error: failed to load %s\n", name.c_str());
       delete trie_ptr;
     } else {
-      dawgs_ += trie_ptr;
+      dawgs_.push_back(trie_ptr);
     }
   }
 
@@ -337,7 +337,7 @@ void Dict::LoadLSTM(const std::string &lang, TessdataManager *data_file) {
       tprintf("Error: failed to load %s\n", name.c_str());
       delete trie_ptr;
     } else {
-      dawgs_ += trie_ptr;
+      dawgs_.push_back(trie_ptr);
     }
   }
 }
@@ -358,9 +358,9 @@ bool Dict::FinishLoad() {
       const Dawg *other = dawgs_[j];
       if (dawg != nullptr && other != nullptr && (dawg->lang() == other->lang()) &&
           kDawgSuccessors[dawg->type()][other->type()])
-        *lst += j;
+        lst->push_back(j);
     }
-    successors_ += lst;
+    successors_.push_back(lst);
   }
   return true;
 }
@@ -378,7 +378,9 @@ void Dict::End() {
     delete dawg_cache_;
     dawg_cache_ = nullptr;
   }
-  successors_.delete_data_pointers();
+  for (auto successor : successors_) {
+    delete successor;
+  }
   dawgs_.clear();
   successors_.clear();
   document_words_ = nullptr;
@@ -550,7 +552,7 @@ void Dict::ProcessPatternEdges(const Dawg *dawg, const DawgPosition &pos, UNICHA
   NODE_REF node = GetStartingNode(dawg, pos.dawg_ref);
   // Try to find the edge corresponding to the exact unichar_id and to all the
   // edges corresponding to the character class of unichar_id.
-  GenericVector<UNICHAR_ID> unichar_id_patterns;
+  std::vector<UNICHAR_ID> unichar_id_patterns;
   unichar_id_patterns.push_back(unichar_id);
   dawg->unichar_id_to_patterns(unichar_id, getUnicharset(), &unichar_id_patterns);
   for (int i = 0; i < unichar_id_patterns.size(); ++i) {
@@ -605,12 +607,12 @@ void Dict::default_dawgs(DawgPositionVector *dawg_pos_vec, bool suppress_pattern
       int dawg_ty = dawgs_[i]->type();
       bool subsumed_by_punc = kDawgSuccessors[DAWG_TYPE_PUNCTUATION][dawg_ty];
       if (dawg_ty == DAWG_TYPE_PUNCTUATION) {
-        *dawg_pos_vec += DawgPosition(-1, NO_EDGE, i, NO_EDGE, false);
+        dawg_pos_vec->push_back(DawgPosition(-1, NO_EDGE, i, NO_EDGE, false));
         if (dawg_debug_level >= 3) {
           tprintf("Adding beginning punc dawg [%d, " REFFORMAT "]\n", i, NO_EDGE);
         }
       } else if (!punc_dawg_available || !subsumed_by_punc) {
-        *dawg_pos_vec += DawgPosition(i, NO_EDGE, -1, NO_EDGE, false);
+        dawg_pos_vec->push_back(DawgPosition(i, NO_EDGE, -1, NO_EDGE, false));
         if (dawg_debug_level >= 3) {
           tprintf("Adding beginning dawg [%d, " REFFORMAT "]\n", i, NO_EDGE);
         }

diff --git a/src/dict/dict.h b/src/dict/dict.h
@@ -54,7 +54,7 @@ struct CHAR_FRAGMENT_INFO {
   float certainty;
 };
 
-using DawgVector = GenericVector<Dawg *>;
+using DawgVector = std::vector<Dawg *>;
 
 //
 // Constants
@@ -495,7 +495,7 @@ class TESS_API Dict {
   // matching.  The first member of each list is taken as canonical.  For
   // example, the first list contains hyphens and dashes with the first symbol
   // being the ASCII hyphen minus.
-  std::vector<GenericVector<UNICHAR_ID>> equivalent_symbols_;
+  std::vector<std::vector<UNICHAR_ID>> equivalent_symbols_;
   // Dawg Cache reference - this is who we ask to allocate/deallocate dawgs.
   DawgCache *dawg_cache_;
   bool dawg_cache_is_ours_; // we should delete our own dawg_cache_

diff --git a/src/dict/stopper.h b/src/dict/stopper.h
@@ -2,7 +2,6 @@
  ** Filename:    stopper.h
  ** Purpose:     Stopping criteria for word classifier.
  ** Author:      Dan Johnson
- ** History:     Wed May  1 09:42:57 1991, DSJ, Created.
  **
  ** (c) Copyright Hewlett-Packard Company, 1988.
  ** Licensed under the Apache License, Version 2.0 (the "License");
@@ -22,7 +21,6 @@
 #include "ratngs.h"
 
 #include <tesseract/unichar.h>
-#include "genericvector.h"
 
 namespace tesseract {
 
@@ -46,7 +44,7 @@ struct DANGERR_INFO {
   UNICHAR_ID leftmost; // in the replacement, what's the leftmost character?
 };
 
-using DANGERR = GenericVector<DANGERR_INFO>;
+using DANGERR = std::vector<DANGERR_INFO>;
 
 } // namespace tesseract
 

diff --git a/src/dict/trie.cpp b/src/dict/trie.cpp
@@ -24,7 +24,6 @@
 
 #include "dawg.h"
 #include "dict.h"
-#include "genericvector.h"
 #include "helpers.h"
 #include "kdpair.h"
 
@@ -49,7 +48,9 @@ const char *Trie::get_reverse_policy_name(RTLReversePolicy reverse_policy) {
 
 // Reset the Trie to empty.
 void Trie::clear() {
-  nodes_.delete_data_pointers();
+  for (auto node : nodes_) {
+    delete node;
+  }
   nodes_.clear();
   root_back_freelist_.clear();
   num_edges_ = 0;
@@ -122,10 +123,11 @@ bool Trie::add_edge_linkage(NODE_REF node1, NODE_REF node2, bool marker_flag, in
   EDGE_RECORD edge_rec;
   link_edge(&edge_rec, node2, marker_flag, direction, word_end, unichar_id);
   if (node1 == 0 && direction == BACKWARD_EDGE && !root_back_freelist_.empty()) {
-    EDGE_INDEX edge_index = root_back_freelist_.pop_back();
+    EDGE_INDEX edge_index = root_back_freelist_.back();
+    root_back_freelist_.pop_back();
     (*vec)[edge_index] = edge_rec;
   } else if (search_index < vec->size()) {
-    vec->insert(edge_rec, search_index);
+    vec->insert(vec->begin() + search_index, edge_rec);
   } else {
     vec->push_back(edge_rec);
   }
@@ -153,7 +155,7 @@ void Trie::add_word_ending(EDGE_RECORD *edge_ptr, NODE_REF the_next_node, bool m
   *edge_ptr |= (WERD_END_FLAG << flag_start_bit_);
 }
 
-bool Trie::add_word_to_dawg(const WERD_CHOICE &word, const GenericVector<bool> *repetitions) {
+bool Trie::add_word_to_dawg(const WERD_CHOICE &word, const std::vector<bool> *repetitions) {
   if (word.length() <= 0)
     return false; // can't add empty words
   if (repetitions != nullptr)
@@ -330,7 +332,7 @@ void Trie::initialize_patterns(UNICHARSET *unicharset) {
 }
 
 void Trie::unichar_id_to_patterns(UNICHAR_ID unichar_id, const UNICHARSET &unicharset,
-                                  GenericVector<UNICHAR_ID> *vec) const {
+                                  std::vector<UNICHAR_ID> *vec) const {
   bool is_alpha = unicharset.get_isalpha(unichar_id);
   if (is_alpha) {
     vec->push_back(alpha_pattern_);
@@ -388,7 +390,7 @@ bool Trie::read_pattern_list(const char *filename, const UNICHARSET &unicharset)
     // Parse the pattern and construct a unichar id vector.
     // Record the number of repetitions of each unichar in the parallel vector.
     WERD_CHOICE word(&unicharset);
-    GenericVector<bool> repetitions_vec;
+    std::vector<bool> repetitions_vec;
     const char *str_ptr = string;
     int step = unicharset.step(str_ptr);
     bool failed = false;
@@ -462,12 +464,12 @@ void Trie::remove_edge_linkage(NODE_REF node1, NODE_REF node2, int direction, bo
     tprintf("\n");
   }
   if (direction == FORWARD_EDGE) {
-    nodes_[node1]->forward_edges.remove(edge_index);
+    nodes_[node1]->forward_edges.erase(nodes_[node1]->forward_edges.begin() + edge_index);
   } else if (node1 == 0) {
     KillEdge(&nodes_[node1]->backward_edges[edge_index]);
     root_back_freelist_.push_back(edge_index);
   } else {
-    nodes_[node1]->backward_edges.remove(edge_index);
+    nodes_[node1]->backward_edges.erase(nodes_[node1]->backward_edges.begin() + edge_index);
   }
   --num_edges_;
 }
@@ -476,7 +478,7 @@ void Trie::remove_edge_linkage(NODE_REF node1, NODE_REF node2, int direction, bo
 // 1 Avoid insertion sorting or bubble sorting the tail root node
 //   (back links on node 0, a list of all the leaves.). The node is
 //   huge, and sorting it with n^2 time is terrible.
-// 2 Avoid using GenericVector::remove on the tail root node.
+// 2 Avoid using vector::erase on the tail root node.
 //   (a) During add of words to the trie, zero-out the unichars and
 //       keep a freelist of spaces to re-use.
 //   (b) During reduction, just zero-out the unichars of deleted back
@@ -624,13 +626,13 @@ void Trie::sort_edges(EDGE_VECTOR *edges) {
   int num_edges = edges->size();
   if (num_edges <= 1)
     return;
-  GenericVector<KDPairInc<UNICHAR_ID, EDGE_RECORD>> sort_vec;
+  std::vector<KDPairInc<UNICHAR_ID, EDGE_RECORD>> sort_vec;
   sort_vec.reserve(num_edges);
   for (int i = 0; i < num_edges; ++i) {
     sort_vec.push_back(
         KDPairInc<UNICHAR_ID, EDGE_RECORD>(unichar_id_from_edge_rec((*edges)[i]), (*edges)[i]));
   }
-  sort_vec.sort();
+  std::sort(sort_vec.begin(), sort_vec.end());
   for (int i = 0; i < num_edges; ++i)
     (*edges)[i] = sort_vec[i].data();
 }