Skip to content

Commit

Permalink
Fix createDocumentsWithResults for multi-page image (Issue #233)
Browse files Browse the repository at this point in the history
  • Loading branch information
nguyenq committed Jul 22, 2022
1 parent 0259930 commit e6b0707
Show file tree
Hide file tree
Showing 8 changed files with 470 additions and 340 deletions.
508 changes: 254 additions & 254 deletions pom.xml

Large diffs are not rendered by default.

16 changes: 13 additions & 3 deletions src/main/java/net/sourceforge/tess4j/ITesseract.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ public interface ITesseract {
+ "</head>\n<body>\n";
String htmlEndTag = "</body>\n</html>\n";

String PAGE_SEPARATOR = "page_separator";

/**
* Rendered formats supported by Tesseract.
*/
Expand Down Expand Up @@ -184,12 +186,11 @@ public enum RenderedFormat {
* <code>tessedit_char_whitelist</code>, etc.
* @param value value for corresponding variable, e.g., "1", "0",
* "0123456789", etc.
* @deprecated
* Use {@link setVariable(String key, String value)} instead.
* @deprecated Use {@link setVariable(String key, String value)} instead.
*/
@Deprecated
void setTessVariable(String key, String value);

/**
* Sets the value of Tesseract's internal parameter.
*
Expand Down Expand Up @@ -300,4 +301,13 @@ public enum RenderedFormat {
* @return list of <code>Word</code>
*/
List<Word> getWords(BufferedImage bi, int pageIteratorLevel);

/**
* Gets recognized words at specified page iterator level.
*
* @param biList list of input buffered image
* @param pageIteratorLevel
* @return list of <code>Word</code>
*/
List<Word> getWords(List<BufferedImage> biList, int pageIteratorLevel);
}
136 changes: 97 additions & 39 deletions src/main/java/net/sourceforge/tess4j/Tesseract.java
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@
* <br>
* Any program that uses the library will need to ensure that the required
* libraries (the <code>.jar</code> files for <code>jna</code> and
* <code>jai-imageio</code>) are in its compile and
* run-time <code>classpath</code>.
* <code>jai-imageio</code>) are in its compile and run-time
* <code>classpath</code>.
*/
public class Tesseract implements ITesseract {

Expand All @@ -72,6 +72,8 @@ public class Tesseract implements ITesseract {
private TessAPI api;
private TessBaseAPI handle;

private boolean alreadyInvoked;

private static final org.slf4j.Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());

public Tesseract() {
Expand Down Expand Up @@ -161,8 +163,7 @@ public void setHocr(boolean hocr) {
* <code>tessedit_char_whitelist</code>, etc.
* @param value value for corresponding variable, e.g., "1", "0",
* "0123456789", etc.
* @deprecated
* Use {@link setVariable(String key, String value)} instead.
* @deprecated Use {@link setVariable(String key, String value)} instead.
*/
@Override
@Deprecated
Expand Down Expand Up @@ -772,45 +773,71 @@ public List<Rectangle> getSegmentedRegions(BufferedImage bi, int pageIteratorLev
*/
@Override
public List<Word> getWords(BufferedImage bi, int pageIteratorLevel) {
this.init();
this.setVariables();
return getWords(Arrays.asList(bi), pageIteratorLevel);
}

/**
* Gets recognized words at specified page iterator level.
*
* @param biList list of input buffered image
* @param pageIteratorLevel TessPageIteratorLevel enum
* @return list of <code>Word</code>
*/
@Override
public List<Word> getWords(List<BufferedImage> biList, int pageIteratorLevel) {
if (!alreadyInvoked) {
this.init();
this.setVariables();
}

String pageSeparator = api.TessBaseAPIGetStringVariable(handle, PAGE_SEPARATOR);
List<Word> words = new ArrayList<Word>();

try {
setImage(bi, null);
for (BufferedImage bi : biList) {
setImage(bi, null);

api.TessBaseAPIRecognize(handle, null);
TessResultIterator ri = api.TessBaseAPIGetIterator(handle);
TessPageIterator pi = api.TessResultIteratorGetPageIterator(ri);
api.TessPageIteratorBegin(pi);

do {
Pointer ptr = api.TessResultIteratorGetUTF8Text(ri, pageIteratorLevel);
if (ptr == null) {
continue;
}
String text = ptr.getString(0);
api.TessDeleteText(ptr);
float confidence = api.TessResultIteratorConfidence(ri, pageIteratorLevel);
IntBuffer leftB = IntBuffer.allocate(1);
IntBuffer topB = IntBuffer.allocate(1);
IntBuffer rightB = IntBuffer.allocate(1);
IntBuffer bottomB = IntBuffer.allocate(1);
api.TessPageIteratorBoundingBox(pi, pageIteratorLevel, leftB, topB, rightB, bottomB);
int left = leftB.get();
int top = topB.get();
int right = rightB.get();
int bottom = bottomB.get();
Word word = new Word(text, confidence, new Rectangle(left, top, right - left, bottom - top));
words.add(word);
} while (api.TessPageIteratorNext(pi, pageIteratorLevel) == TRUE);
// api.TessPageIteratorDelete(pi);
api.TessResultIteratorDelete(ri);

api.TessBaseAPIRecognize(handle, null);
TessResultIterator ri = api.TessBaseAPIGetIterator(handle);
TessPageIterator pi = api.TessResultIteratorGetPageIterator(ri);
api.TessPageIteratorBegin(pi);
words.add(new Word(pageSeparator, 100, new Rectangle())); // add page separator
}

do {
Pointer ptr = api.TessResultIteratorGetUTF8Text(ri, pageIteratorLevel);
if (ptr == null) {
continue;
}
String text = ptr.getString(0);
api.TessDeleteText(ptr);
float confidence = api.TessResultIteratorConfidence(ri, pageIteratorLevel);
IntBuffer leftB = IntBuffer.allocate(1);
IntBuffer topB = IntBuffer.allocate(1);
IntBuffer rightB = IntBuffer.allocate(1);
IntBuffer bottomB = IntBuffer.allocate(1);
api.TessPageIteratorBoundingBox(pi, pageIteratorLevel, leftB, topB, rightB, bottomB);
int left = leftB.get();
int top = topB.get();
int right = rightB.get();
int bottom = bottomB.get();
Word word = new Word(text, confidence, new Rectangle(left, top, right - left, bottom - top));
words.add(word);
} while (api.TessPageIteratorNext(pi, pageIteratorLevel) == TRUE);
// api.TessPageIteratorDelete(pi);
api.TessResultIteratorDelete(ri);
// remove last page separator
if (!words.isEmpty()) {
words.remove(words.size() - 1);
}
} catch (Exception e) {
logger.warn(e.getMessage(), e);
} finally {
dispose();
if (!alreadyInvoked) {
dispose();
}
}

return words;
Expand Down Expand Up @@ -866,9 +893,9 @@ public List<OCRResult> createDocumentsWithResults(BufferedImage[] bis, String[]
try {
TessResultRenderer renderer = createRenderers(outputbases[i], formats);
int meanTextConfidence = createDocuments(bis[i], filenames[i], renderer);
api.TessDeleteResultRenderer(renderer);
List<Word> words = meanTextConfidence > 0 ? getRecognizedWords(pageIteratorLevel) : new ArrayList<Word>();
results.add(new OCRResult(meanTextConfidence, words));
api.TessDeleteResultRenderer(renderer);
} catch (Exception e) {
// skip the problematic image file
logger.warn(e.getMessage(), e);
Expand Down Expand Up @@ -934,9 +961,9 @@ public List<OCRResult> createDocumentsWithResults(String[] filenames, String[] o

TessResultRenderer renderer = createRenderers(outputbases[i], formats);
int meanTextConfidence = createDocuments(imageFile.getPath(), renderer);
List<Word> words = meanTextConfidence > 0 ? getRecognizedWords(pageIteratorLevel) : new ArrayList<Word>();
results.add(new OCRResult(meanTextConfidence, words));
api.TessDeleteResultRenderer(renderer);
List<Word> words = meanTextConfidence > 0 ? getRecognizedWords(imageFile, pageIteratorLevel) : new ArrayList<Word>();
results.add(new OCRResult(meanTextConfidence, words));
} catch (Exception e) {
// skip the problematic image file
logger.warn(e.getMessage(), e);
Expand All @@ -955,7 +982,8 @@ public List<OCRResult> createDocumentsWithResults(String[] filenames, String[] o
}

/**
* Gets result words at specified page iterator level from recognized pages.
* Gets result words at specified page iterator level from a recognized
* page.
*
* @param pageIteratorLevel TessPageIteratorLevel enum
* @return list of <code>Word</code>
Expand All @@ -972,7 +1000,7 @@ private List<Word> getRecognizedWords(int pageIteratorLevel) {
Pointer ptr = api.TessResultIteratorGetUTF8Text(ri, pageIteratorLevel);
if (ptr == null) {
continue;
}
}
String text = ptr.getString(0);
api.TessDeleteText(ptr);
float confidence = api.TessResultIteratorConfidence(ri, pageIteratorLevel);
Expand All @@ -997,6 +1025,36 @@ private List<Word> getRecognizedWords(int pageIteratorLevel) {
return words;
}

/**
* Gets result words at specified page iterator level from pages. For
* multi-page images, it reruns recognition, doubling processing time.
*
* @param pageIteratorLevel TessPageIteratorLevel enum
* @return list of <code>Word</code>
*/
private List<Word> getRecognizedWords(File inputFile, int pageIteratorLevel) {
List<Word> words = new ArrayList<Word>();

try {
List<BufferedImage> biList = ImageIOHelper.getImageList(inputFile);

if (biList.isEmpty()) {
return words;
} else if (biList.size() == 1) {
return getRecognizedWords(pageIteratorLevel);
} else {
alreadyInvoked = true;
return getWords(biList, pageIteratorLevel);
}
} catch (IOException e) {
logger.warn(e.getMessage(), e);
} finally {
alreadyInvoked = false;
}

return words;
}

/**
* Releases all of the native resources used by this instance.
*/
Expand Down
Loading

0 comments on commit e6b0707

Please sign in to comment.