Skip to content

Commit

Permalink
Add attributes to PDF structure tree nodes.
Browse files Browse the repository at this point in the history
Just like in HTML, some nodes in a PDF structure tree need not only the
tag name, but also some attributes.

This change adds some of the most generally useful: alt text for images,
language codes for any element, and various table-related attributes.

Bug: 1039816
Change-Id: Ib9c8a768f594bdd61cf5848befa09af290dafdc5
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2090621
Reviewed-by: Lei Zhang <thestig@chromium.org>
Reviewed-by: Eric Seckler <eseckler@chromium.org>
Auto-Submit: Dominic Mazzoni <dmazzoni@chromium.org>
Commit-Queue: Eric Seckler <eseckler@chromium.org>
Cr-Commit-Position: refs/heads/master@{#749068}
  • Loading branch information
minorninth authored and Commit Bot committed Mar 11, 2020
1 parent 6f42f1b commit 5ff1eea
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 6 deletions.
6 changes: 6 additions & 0 deletions headless/lib/headless_web_contents_browsertest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,12 @@ const char kExpectedStructTreeJSON[] = R"({
} ]
} ]
} ]
}, {
"type": "Div",
"~children": [ {
"alt": "Car at the beach",
"type": "Figure"
} ]
} ]
}
)";
Expand Down
3 changes: 3 additions & 0 deletions headless/test/data/structured_doc.html
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,7 @@ <h1>Title</h1>
<td>Cell 2</td>
</tr>
</table>
<div>
<img src="svg_example_image.png" alt="Car at the beach">
</div>
</body>
14 changes: 10 additions & 4 deletions pdf/pdfium/pdfium_engine_exports.cc
Original file line number Diff line number Diff line change
Expand Up @@ -132,16 +132,22 @@ bool IsValidPrintableArea(const gfx::Size& page_size,

base::Value RecursiveGetStructTree(FPDF_STRUCTELEMENT struct_elem) {
constexpr int kBufLen = 64;
base::char16 elem_type_buffer[kBufLen];
if (!FPDF_StructElement_GetType(struct_elem, elem_type_buffer,
sizeof(elem_type_buffer))) {
base::char16 str_buffer[kBufLen];
if (!FPDF_StructElement_GetType(struct_elem, str_buffer,
sizeof(str_buffer))) {
return base::Value(base::Value::Type::NONE);
}

base::Value result(base::Value::Type::DICTIONARY);
base::string16 elem_type(elem_type_buffer);
base::string16 elem_type(str_buffer);
result.SetStringKey("type", elem_type);

if (FPDF_StructElement_GetAltText(struct_elem, str_buffer,
sizeof(str_buffer))) {
base::string16 alt_text(str_buffer);
result.SetStringKey("alt", alt_text);
}

int children_count = FPDF_StructElement_CountChildren(struct_elem);
if (children_count == 0)
return base::Value(base::Value::Type::NONE);
Expand Down
1 change: 1 addition & 0 deletions printing/common/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ source_set("common") {

deps = [
"//base",
"//printing/buildflags",
"//skia",
"//ui/accessibility",
]
Expand Down
76 changes: 74 additions & 2 deletions printing/common/metafile_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,35 @@
#include "printing/common/metafile_utils.h"

#include "base/time/time.h"
#include "printing/buildflags/buildflags.h"
#include "third_party/skia/include/core/SkCanvas.h"
#include "third_party/skia/include/core/SkPicture.h"
#include "third_party/skia/include/core/SkPictureRecorder.h"
#include "third_party/skia/include/core/SkTime.h"
#include "third_party/skia/include/docs/SkPDFDocument.h"
#include "ui/accessibility/ax_node.h"
#include "ui/accessibility/ax_node_data.h"
#include "ui/accessibility/ax_role_properties.h"
#include "ui/accessibility/ax_tree.h"
#include "ui/accessibility/ax_tree_update.h"

namespace {

#if BUILDFLAG(ENABLE_TAGGED_PDF)
// Standard attribute owners from PDF 32000-1:2008 spec, section 14.8.5.2
// (Attribute owners are kind of like "categories" for structure node
// attributes.)
const char kPDFTableAttributeOwner[] = "Table";

// Table Attributes from PDF 32000-1:2008 spec, section 14.8.5.7
const char kPDFTableCellColSpanAttribute[] = "ColSpan";
const char kPDFTableCellHeadersAttribute[] = "Headers";
const char kPDFTableCellRowSpanAttribute[] = "RowSpan";
const char kPDFTableHeaderScopeAttribute[] = "Scope";
const char kPDFTableHeaderScopeColumn[] = "Column";
const char kPDFTableHeaderScopeRow[] = "Row";
#endif // BUILDFLAG(ENABLE_TAGGED_PDF)

SkTime::DateTime TimeToSkTime(base::Time time) {
base::Time::Exploded exploded;
time.UTCExplode(&exploded);
Expand Down Expand Up @@ -46,6 +63,7 @@ sk_sp<SkPicture> GetEmptyPicture() {
// have enough data to build a valid tree.
bool RecursiveBuildStructureTree(const ui::AXNode* ax_node,
SkPDF::StructureElementNode* tag) {
#if BUILDFLAG(ENABLE_TAGGED_PDF)
bool valid = false;

tag->fNodeId = ax_node->GetIntAttribute(ax::mojom::IntAttribute::kDOMNodeId);
Expand Down Expand Up @@ -79,16 +97,46 @@ bool RecursiveBuildStructureTree(const ui::AXNode* ax_node,
tag->fType = SkPDF::DocumentStructureType::kTR;
break;
case ax::mojom::Role::kColumnHeader:
tag->fType = SkPDF::DocumentStructureType::kTH;
tag->fAttributes.appendString(kPDFTableAttributeOwner,
kPDFTableHeaderScopeAttribute,
kPDFTableHeaderScopeColumn);
break;
case ax::mojom::Role::kRowHeader:
tag->fType = SkPDF::DocumentStructureType::kTH;
tag->fAttributes.appendString(kPDFTableAttributeOwner,
kPDFTableHeaderScopeAttribute,
kPDFTableHeaderScopeRow);
break;
case ax::mojom::Role::kCell:
case ax::mojom::Role::kCell: {
tag->fType = SkPDF::DocumentStructureType::kTD;

// Append an attribute consisting of the string IDs of all of the
// header cells that correspond to this table cell.
std::vector<ui::AXNode*> header_nodes;
ax_node->GetTableCellColHeaders(&header_nodes);
ax_node->GetTableCellRowHeaders(&header_nodes);
std::vector<SkString> header_id_strs;
header_id_strs.reserve(header_nodes.size());
for (ui::AXNode* header_node : header_nodes) {
int node_id =
header_node->GetIntAttribute(ax::mojom::IntAttribute::kDOMNodeId);
header_id_strs.push_back(
SkString(base::NumberToString(node_id).c_str()));
}
tag->fAttributes.appendStringArray(kPDFTableAttributeOwner,
kPDFTableCellHeadersAttribute,
header_id_strs);
break;
}
case ax::mojom::Role::kFigure:
case ax::mojom::Role::kImage:
case ax::mojom::Role::kImage: {
tag->fType = SkPDF::DocumentStructureType::kFigure;
std::string alt =
ax_node->GetStringAttribute(ax::mojom::StringAttribute::kName);
tag->fAlt = SkString(alt.c_str());
break;
}
case ax::mojom::Role::kStaticText:
// Currently we're only marking text content, so we can't generate
// a nonempty structure tree unless we have at least one kStaticText
Expand All @@ -100,6 +148,27 @@ bool RecursiveBuildStructureTree(const ui::AXNode* ax_node,
tag->fType = SkPDF::DocumentStructureType::kNonStruct;
}

if (ui::IsCellOrTableHeader(ax_node->data().role)) {
base::Optional<int> row_span = ax_node->GetTableCellRowSpan();
if (row_span.has_value()) {
tag->fAttributes.appendInt(kPDFTableAttributeOwner,
kPDFTableCellRowSpanAttribute,
row_span.value());
}
base::Optional<int> col_span = ax_node->GetTableCellColSpan();
if (col_span.has_value()) {
tag->fAttributes.appendInt(kPDFTableAttributeOwner,
kPDFTableCellColSpanAttribute,
col_span.value());
}
}

std::string lang = ax_node->GetLanguage();
std::string parent_lang =
ax_node->parent() ? ax_node->parent()->GetLanguage() : "";
if (!lang.empty() && lang != parent_lang)
tag->fLang = lang.c_str();

size_t children_count = ax_node->GetUnignoredChildCount();
tag->fChildVector.resize(children_count);
for (size_t i = 0; i < children_count; i++) {
Expand All @@ -112,6 +181,9 @@ bool RecursiveBuildStructureTree(const ui::AXNode* ax_node,
}

return valid;
#else // BUILDFLAG(ENABLE_TAGGED_PDF)
return false;
#endif
}

} // namespace
Expand Down

0 comments on commit 5ff1eea

Please sign in to comment.