Skip to content

Commit

Permalink
Add support for inlineTags extraAttributes.
Browse files Browse the repository at this point in the history
This allows you to use XPath to add attributes to inline tags,
in addition to the one they have in the document.

See how-to-configure-indexing.md details.
  • Loading branch information
jan-niestadt committed Jun 4, 2024
1 parent d8801f5 commit 53b9f13
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,39 @@
*/
public class ConfigInlineTag {

/** Configuration for extra attributes to index using XPath */
public static class ConfigExtraAttribute {
/** Attribute name */
private String name;
/** XPath to get attribute's value */
private String valuePath;

public ConfigExtraAttribute() {

}

public ConfigExtraAttribute(String name, String valuePath) {
this.name = name;
this.valuePath = valuePath;
}

public String getName() {
return name;
}

public void setName(String name) {
this.name = name;
}

public String getValuePath() {
return valuePath;
}

public void setValuePath(String valuePath) {
this.valuePath = valuePath;
}
}

/** XPath to the inline tag, relative to the container element */
private String path;

Expand All @@ -34,6 +67,9 @@ public class ConfigInlineTag {
/** If set: ignore excludeAttributes and don't index attributes not in this list. */
private List<String> includeAttributes = null;

/** Extra attributes to index with the tag via an XPath expression */
private List<ConfigExtraAttribute> extraAttributes = Collections.emptyList();

public ConfigInlineTag() {
}

Expand Down Expand Up @@ -94,4 +130,16 @@ public void setIncludeAttributes(List<String> includeAttributes) {
public List<String> getIncludeAttributes() {
return includeAttributes;
}

public void setExtraAttributes(List<ConfigExtraAttribute> extraAttributes) {
this.extraAttributes = extraAttributes;
}

public List<ConfigExtraAttribute> getExtraAttributes() {
return extraAttributes;
}

public boolean hasDetailedAttributeConfig() {
return includeAttributes != null || !excludeAttributes.isEmpty() || !extraAttributes.isEmpty();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,12 @@ private void handleInlineOpenTag(ConfigAnnotatedField annotatedField, Map<Span,
}
}
}
// Index any extra attributes using the provided XPath expressions.
for (ConfigInlineTag.ConfigExtraAttribute extraAttribute: currentInline.config.getExtraAttributes()) {
String value = xpathValue(extraAttribute.getValuePath(), nodeInfo);
if (value != null)
atts.put(extraAttribute.getName(), value);
}
inlineTag(nodeInfo.getDisplayName(), true, atts);

// Add tag to the list of tags to close at the correct position.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,9 @@ private void collectPunctuation(ConfigAnnotatedField annotatedField, List<Inline
private void collectInlineTags(ConfigAnnotatedField annotatedField, List<InlineObject> inlineObjects) {
int i = 0;
for (ConfigInlineTag inlineTag : annotatedField.getInlineTags()) {
if (inlineTag.hasDetailedAttributeConfig())
warn("Detailed inline tag attribute configuration not supported in VTD indexer. Ignoring for tag: " +
inlineTag.getPath() + " (for support, add 'processor: saxon' to .blf.yaml file)");
// Collect the occurrences of this inline tag
String tokenIdXPath = inlineTag.getTokenIdPath();
// We want to capture token ids for this inline tag. Create the AutoPilot.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,9 @@ private void readInlineTags(Entry<String, JsonNode> itsEntry, ConfigAnnotatedFie
readStringList(e, exclAttr);
t.setExcludeAttributes(exclAttr);
break;
case "extraAttributes":
t.setExtraAttributes(readExtraAttributes(e));
break;
default:
throw new InvalidInputFormatConfig("Unknown key " + e.getKey() + " in inline tag " + t.getPath());
}
Expand All @@ -576,6 +579,40 @@ private void readInlineTags(Entry<String, JsonNode> itsEntry, ConfigAnnotatedFie
}
}

/**
* Extra attributes to index by XPath for inline tag
*
* @return
*/
private List<ConfigInlineTag.ConfigExtraAttribute> readExtraAttributes(Entry<String, JsonNode> e) {
List<ConfigInlineTag.ConfigExtraAttribute> extraAttr = new ArrayList<>();
Iterator<JsonNode> itExtraAttr = array(e).elements();
while (itExtraAttr.hasNext()) {
JsonNode ea = itExtraAttr.next();
ConfigInlineTag.ConfigExtraAttribute a = new ConfigInlineTag.ConfigExtraAttribute();
Iterator<Entry<String, JsonNode>> itExtraAttrEntry = obj(ea, "extra attribute").fields();
while (itExtraAttrEntry.hasNext()) {
Entry<String, JsonNode> eea = itExtraAttrEntry.next();
switch (eea.getKey()) {
case "name":
a.setName(str(eea));
break;
case "value":
a.setValuePath(fixedStringToXpath(str(eea)));
break;
case "valuePath":
a.setValuePath(str(eea));
break;
default:
throw new InvalidInputFormatConfig(
"Unknown key " + eea.getKey() + " in extra attribute " + a.getName());
}
}
extraAttr.add(a);
}
return extraAttr;
}

private void readMetadata(Entry<String, JsonNode> mdEntry, ConfigInputFormat cfg) {
JsonNode node = mdEntry.getValue();
if (node instanceof ObjectNode) {
Expand Down
9 changes: 8 additions & 1 deletion site/docs/guide/how-to-configure-indexing.md
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,14 @@ inlineTags:
- "xml:id"
- path: .//p
includeAttributes:
- "type" # Only index the type attribute
- "type" # Only index this tag's type attribute
# Any extra attributes to index with this tag using XPath
extraAttributes:
# if e.g. input is <p xml:id="par-12">...</p> , index number="12"
- name: "number"
valuePath: "substring-after(@xml:id, 'par-')"
- path: .//ne
displayAs: named-entity # what CSS class to use (when using autogenerated XSLT)
```
Expand Down

0 comments on commit 53b9f13

Please sign in to comment.