Skip to content

Commit

Permalink
Rotate images in rotated PDFs before sending them to OCR.
Browse files Browse the repository at this point in the history
If a PDF page is marked as rotated, the images in that page are rotated
before being sent to OCR service.

Bug: 1469236
Change-Id: I82d8981df86f0d7224886765589ed10dec551338
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/4735395
Commit-Queue: Ramin Halavati <rhalavati@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1180140}
  • Loading branch information
Ramin Halavati authored and Chromium LUCI CQ committed Aug 7, 2023
1 parent d096e54 commit 6e0174a
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 2 deletions.
24 changes: 22 additions & 2 deletions pdf/pdfium/pdfium_page.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include "ui/gfx/geometry/vector2d.h"
#include "ui/gfx/geometry/vector2d_f.h"
#include "ui/gfx/range/range.h"
#include "ui/gfx/skbitmap_operations.h"

using printing::ConvertUnitFloat;
using printing::kPixelsPerInch;
Expand Down Expand Up @@ -1322,8 +1323,27 @@ void PDFiumPage::CalculateImages() {
kBGRA_8888_SkColorType, kOpaque_SkAlphaType);
const size_t row_bytes = FPDFBitmap_GetStride(bitmap.get());
SkPixmap pixels(info, FPDFBitmap_GetBuffer(bitmap.get()), row_bytes);
if (image.image_data.tryAllocPixels(info, row_bytes))
image.image_data.writePixels(pixels);
if (!image.image_data.tryAllocPixels(info, row_bytes)) {
continue;
}
image.image_data.writePixels(pixels);

SkBitmapOperations::RotationAmount rotation;
switch (FPDFPage_GetRotation(page)) {
case 0:
continue;
case 1:
rotation = SkBitmapOperations::RotationAmount::ROTATION_90_CW;
break;
case 2:
rotation = SkBitmapOperations::RotationAmount::ROTATION_180_CW;
break;
case 3:
rotation = SkBitmapOperations::RotationAmount::ROTATION_270_CW;
break;
}

image.image_data = SkBitmapOperations::Rotate(image.image_data, rotation);
}
}

Expand Down
1 change: 1 addition & 0 deletions pdf/pdfium/pdfium_page.h
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ class PDFiumPage {
FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageTest, CalculateImages);
FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageTest, ImageAltText);
FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageDataTest, ImageData);
FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageDataTest, RotatedPageImageData);
FRIEND_TEST_ALL_PREFIXES(PDFiumPageLinkTest, AnnotLinkGeneration);
FRIEND_TEST_ALL_PREFIXES(PDFiumPageLinkTest, GetLinkTarget);
FRIEND_TEST_ALL_PREFIXES(PDFiumPageLinkTest, GetUTF8LinkTarget);
Expand Down
17 changes: 17 additions & 0 deletions pdf/pdfium/pdfium_page_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,23 @@ TEST_P(PDFiumPageImageDataTest, ImageData) {
EXPECT_EQ(page.images_[1].image_data.height(), 50);
}

TEST_P(PDFiumPageImageDataTest, RotatedPageImageData) {
TestClient client;
std::unique_ptr<PDFiumEngine> engine =
InitializeEngine(&client, FILE_PATH_LITERAL("rotated_page.pdf"));
ASSERT_TRUE(engine);
ASSERT_EQ(1, engine->GetNumberOfPages());

PDFiumPage& page = GetPDFiumPageForTest(*engine, 0);
page.CalculateImages();
ASSERT_EQ(1u, page.images_.size());

// This page is rotated, therefore the extracted image size is 25x100 while
// the stored image is 100x25.
EXPECT_EQ(page.images_[0].image_data.width(), 25);
EXPECT_EQ(page.images_[0].image_data.height(), 100);
}

INSTANTIATE_TEST_SUITE_P(All, PDFiumPageImageDataTest, testing::Bool());

using PDFiumPageTextTest = PDFiumTestBase;
Expand Down
53 changes: 53 additions & 0 deletions pdf/test/data/rotated_page.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{{header}}
{{object 1 0}} <<
/Type /Catalog
/Pages 2 0 R
>>
endobj
{{object 2 0}} <<
/Type /Pages
/MediaBox [0 0 200 200]
/Count 1
/Kids [3 0 R]
>>
endobj
{{object 3 0}} <<
/Type /Page
/Parent 2 0 R
/Contents 4 0 R
/Rotate 270
/Resources <<
/XObject <<
/Img 5 0 R
>>
>>
>>
endobj
{{object 4 0}} <<
{{streamlen}}
>>
stream
q
30 -30 40 40 100 100 cm
/Img Do
Q
endstream
endobj
{{object 5 0}} <<
/Type /XObject
/Subtype /Image
/Width 100
/Height 25
/BitsPerComponent 8
/ColorSpace /DeviceRGB
/Filter [/ASCIIHexDecode /FlateDecode]
{{streamlen}}
>>
stream
789cedc2310d00000c03a07f2aaab3ea7bcf03842655555555555555f5bf01cc7818dc
endstream
endobj
{{xref}}
{{trailer}}
{{startxref}}
%%EOF
Binary file added pdf/test/data/rotated_page.pdf
Binary file not shown.

0 comments on commit 6e0174a

Please sign in to comment.