Add DocxDocument interface and refactor comment utilities

Introduced the `DocxDocument` interface for common document operations and refactored comment-related utilities for improved modularity. Deprecated methods in `DocxPart` were removed, with necessary methods now defined in the new interface. The `CommentCollectorWalker` class was added to handle comment collection, further simplifying `CommentUtil`.
verronpro · Sep 22, 2024 · 20a29b9 · 20a29b9
1 parent 74c7dc7
commit 20a29b9
Show file tree

Hide file tree

Showing 8 changed files with 215 additions and 204 deletions.
diff --git a/engine/src/main/java/pro/verron/officestamper/api/DocxDocument.java b/engine/src/main/java/pro/verron/officestamper/api/DocxDocument.java
@@ -0,0 +1,12 @@
+package pro.verron.officestamper.api;
+
+import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
+import org.docx4j.openpackaging.parts.WordprocessingML.CommentsPart;
+
+import java.util.stream.Stream;
+
+public interface DocxDocument {
+    WordprocessingMLPackage document();
+    Stream<DocxPart> streamParts(String type);
+    CommentsPart commentsPart();
+}
diff --git a/engine/src/main/java/pro/verron/officestamper/api/DocxPart.java b/engine/src/main/java/pro/verron/officestamper/api/DocxPart.java
@@ -1,30 +1,16 @@
 package pro.verron.officestamper.api;
 
-import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
 import org.docx4j.openpackaging.parts.Part;
-import org.docx4j.openpackaging.parts.WordprocessingML.CommentsPart;
 import org.docx4j.wml.ContentAccessor;
 import org.docx4j.wml.P;
 
 import java.util.List;
 import java.util.stream.Stream;
 
-public interface DocxPart {
-    DocxPart from(ContentAccessor accessor);
-
+public interface DocxPart
+        extends DocxDocument {
     Part part();
-
+    DocxPart from(ContentAccessor accessor);
     List<Object> content();
-
-    @Deprecated(since = "2.5")
-    WordprocessingMLPackage document();
-
-    @Deprecated(since = "2.5")
-    CommentsPart commentsPart();
-
-    @Deprecated(since = "2.5")
     Stream<P> streamParagraphs();
-
-    @Deprecated(since = "2.5")
-    Stream<DocxPart> streamParts(String header);
 }
diff --git a/engine/src/main/java/pro/verron/officestamper/api/OfficeStamperConfiguration.java b/engine/src/main/java/pro/verron/officestamper/api/OfficeStamperConfiguration.java
@@ -1,16 +1,16 @@
 package pro.verron.officestamper.api;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 import org.springframework.expression.spel.SpelParserConfiguration;
 
 import java.util.List;
 import java.util.Map;
 import java.util.function.Function;
 
-public interface OfficeStamperConfiguration {
-    Logger logger = LoggerFactory.getLogger(OfficeStamperConfiguration.class);
 
+/**
+ * Interface representing the configuration for the Office Stamper functionality.
+ */
+public interface OfficeStamperConfiguration {
     /**
      * Checks if the failOnUnresolvedExpression flag is set to true or false.
      *

diff --git a/engine/src/main/java/pro/verron/officestamper/core/CommentCollectorWalker.java b/engine/src/main/java/pro/verron/officestamper/core/CommentCollectorWalker.java
@@ -0,0 +1,150 @@
+package pro.verron.officestamper.core;
+
+import org.docx4j.TextUtils;
+import org.docx4j.wml.CommentRangeEnd;
+import org.docx4j.wml.CommentRangeStart;
+import org.docx4j.wml.R;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import pro.verron.officestamper.api.Comment;
+import pro.verron.officestamper.api.DocxPart;
+import pro.verron.officestamper.api.OfficeStamperException;
+
+import java.math.BigInteger;
+import java.util.*;
+import java.util.stream.Collectors;
+
+import static java.util.stream.Collectors.toSet;
+
+class CommentCollectorWalker
+        extends BaseDocumentWalker {
+    private static final Logger logger = LoggerFactory.getLogger(CommentCollectorWalker.class);
+    private final DocxPart document;
+    private final Map<BigInteger, Comment> allComments;
+    private final Queue<Comment> stack;
+    private final Map<BigInteger, Comment> rootComments;
+
+    private CommentCollectorWalker(
+            DocxPart document,
+            Map<BigInteger, Comment> rootComments,
+            Map<BigInteger, Comment> allComments
+    ) {
+        super(document);
+        this.document = document;
+        this.allComments = allComments;
+        this.stack = Collections.asLifoQueue(new ArrayDeque<>());
+        this.rootComments = rootComments;
+    }
+
+    static Map<BigInteger, Comment> collectComments(DocxPart docxPart) {
+        var rootComments = new HashMap<BigInteger, Comment>();
+        var allComments = new HashMap<BigInteger, Comment>();
+        new CommentCollectorWalker(docxPart, rootComments, allComments).walk();
+
+        var commentsPart = docxPart.commentsPart();
+        if (commentsPart == null)
+            return rootComments;
+        var comments = CommentUtil.getComments(commentsPart);
+
+        for (var comment : comments) {
+            var commentWrapper = allComments.get(comment.getId());
+            if (commentWrapper != null)
+                commentWrapper.setComment(comment);
+        }
+        return cleanMalformedComments(rootComments);
+    }
+
+    private static Map<BigInteger, Comment> cleanMalformedComments(Map<BigInteger, Comment> rootComments) {
+        Map<BigInteger, Comment> filteredCommentEntries = new HashMap<>();
+
+        rootComments.forEach((key, comment) -> {
+            if (isCommentMalformed(comment)) {
+                var commentContent = getCommentContent(comment);
+                logger.error("Skipping malformed comment, missing range start and/or range end : {}", commentContent);
+            }
+            else {
+                filteredCommentEntries.put(key, comment);
+                comment.setChildren(cleanMalformedComments(comment.getChildren()));
+            }
+        });
+        return filteredCommentEntries;
+    }
+
+    private static Set<Comment> cleanMalformedComments(Set<Comment> children) {
+        return children
+                .stream()
+                .filter(comment -> {
+                    if (isCommentMalformed(comment)) {
+                        var commentContent = getCommentContent(comment);
+                        logger.error("Skipping malformed comment, missing range start and/or range end : {}",
+                                commentContent);
+                        return false;
+                    }
+                    comment.setChildren(cleanMalformedComments(comment.getChildren()));
+                    return true;
+                })
+                .collect(toSet());
+    }
+
+    private static boolean isCommentMalformed(Comment comment) {
+        return comment.getCommentRangeStart() == null
+               || comment.getCommentRangeEnd() == null
+               || comment.getComment() == null;
+    }
+
+    private static String getCommentContent(Comment comment) {
+        return comment.getComment() == null
+                ? "<no content>"
+                : comment.getComment()
+                         .getContent()
+                         .stream()
+                         .map(TextUtils::getText)
+                         .collect(Collectors.joining(""));
+    }
+
+    @Override
+    protected void onCommentRangeStart(CommentRangeStart commentRangeStart) {
+        Comment comment = allComments.get(commentRangeStart.getId());
+        if (comment == null) {
+            comment = new StandardComment(document.document());
+            allComments.put(commentRangeStart.getId(), comment);
+            if (stack.isEmpty()) {
+                rootComments.put(commentRangeStart.getId(),
+                        comment);
+            }
+            else {
+                stack.peek()
+                     .getChildren()
+                     .add(comment);
+            }
+        }
+        comment.setCommentRangeStart(commentRangeStart);
+        stack.add(comment);
+    }
+
+    @Override
+    protected void onCommentRangeEnd(CommentRangeEnd commentRangeEnd) {
+        Comment comment = allComments.get(commentRangeEnd.getId());
+        if (comment == null)
+            throw new OfficeStamperException("Found a comment range end before the comment range start !");
+
+        comment.setCommentRangeEnd(commentRangeEnd);
+
+        if (stack.isEmpty()) return;
+
+        var peek = stack.peek();
+        if (peek.equals(comment))
+            stack.remove();
+        else throw new OfficeStamperException("Cannot figure which comment contains the other !");
+    }
+
+    @Override
+    protected void onCommentReference(R.CommentReference commentReference) {
+        Comment comment = allComments.get(commentReference.getId());
+        if (comment == null) {
+            comment = new StandardComment(document.document());
+            allComments.put(commentReference.getId(), comment);
+        }
+        comment.setCommentReference(commentReference);
+    }
+}
diff --git a/engine/src/main/java/pro/verron/officestamper/core/CommentProcessorRegistry.java b/engine/src/main/java/pro/verron/officestamper/core/CommentProcessorRegistry.java
@@ -18,8 +18,8 @@
 import java.math.BigInteger;
 import java.util.*;
 
+import static pro.verron.officestamper.core.CommentCollectorWalker.collectComments;
 import static pro.verron.officestamper.core.CommentUtil.getCommentString;
-import static pro.verron.officestamper.core.CommentUtil.getComments;
 
 /**
  * Allows registration of {@link CommentProcessor} objects. Each registered
@@ -70,7 +70,7 @@ public <T> void runProcessors(T expressionContext) {
               .filter(R.class::isInstance)
               .map(R.class::cast)
               .forEach(run -> {
-                  var comments = getComments(source);
+                  var comments = collectComments(source);
                   var runParent = (P) run.getParent();
                   var optional = runProcessorsOnRunComment(comments, expressionContext, run, runParent);
                   if (optional.isPresent()) {
@@ -87,7 +87,7 @@ public <T> void runProcessors(T expressionContext) {
         source.streamParagraphs()
               .forEach(p -> {
                   var document = source.document();
-                  var comments = getComments(source);
+                  var comments = collectComments(source);
                   var optional = runProcessorsOnParagraphComment(document, comments, expressionContext, p);
                   if (optional.isPresent()) {
                       for (Object processor : commentProcessors.values()) {