From 7e93907e9c98270e76e20d55c4d35bd600edbb20 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Mon, 24 Oct 2022 03:01:56 +0100 Subject: [PATCH 1/2] add recursion limit of 500 for DTD parsing (#159) --- .../java/com/ctc/wstx/dtd/FullDTDReader.java | 35 +++++++-- src/test/java/wstxtest/BaseWstxTest.java | 23 ++++++ .../java/wstxtest/fuzz/Fuzz_DTDReadTest.java | 76 +++++++++++++++++++ ...se-modified-XmlFuzzer-5219006592450560.txt | 1 + 4 files changed, 128 insertions(+), 7 deletions(-) create mode 100644 src/test/java/wstxtest/fuzz/Fuzz_DTDReadTest.java create mode 100644 src/test/resources/fuzz/clusterfuzz-testcase-modified-XmlFuzzer-5219006592450560.txt diff --git a/src/main/java/com/ctc/wstx/dtd/FullDTDReader.java b/src/main/java/com/ctc/wstx/dtd/FullDTDReader.java index fd8a4d03..19f7ab8e 100644 --- a/src/main/java/com/ctc/wstx/dtd/FullDTDReader.java +++ b/src/main/java/com/ctc/wstx/dtd/FullDTDReader.java @@ -74,6 +74,9 @@ public class FullDTDReader final static Boolean ENTITY_EXP_PE = Boolean.TRUE; + final static int DEFAULT_DTD_RECURSION_DEPTH_LIMIT = 500; + static int DTD_RECURSION_DEPTH_LIMIT = DEFAULT_DTD_RECURSION_DEPTH_LIMIT; + /* /////////////////////////////////////////////////////////// // Configuration @@ -327,6 +330,24 @@ public class FullDTDReader transient TextBuffer mTextBuffer = null; + /** + * Sets the limit on how many times the code will recurse through DTD data. + * The default is 500. + * @param limit new limit on how many times the code will recurse through DTD data + */ + public static void setDtdRecursionDepthLimit(final int limit) { + DTD_RECURSION_DEPTH_LIMIT = limit; + } + + /** + * Gets the limit on how many times the code will recurse through DTD data. + * The default is 500. + * @return limit on how many times the code will recurse through DTD data + */ + public static int getDtdRecursionDepthLimit() { + return DTD_RECURSION_DEPTH_LIMIT; + } + /* /////////////////////////////////////////////////////////// // Life-cycle @@ -2271,7 +2292,7 @@ private void handleElementDecl() vldContent = XMLValidator.CONTENT_ALLOW_ANY_TEXT; // checked against DTD } else { --mInputPtr; // let's push it back... - ContentSpec spec = readContentSpec(elemName, true, mCfgFullyValidating); + ContentSpec spec = readContentSpec(elemName, mCfgFullyValidating, 0); val = spec.getSimpleValidator(); if (val == null) { val = new DFAValidator(DFAState.constructDFA(spec)); @@ -3049,13 +3070,13 @@ private StructValidator readMixedSpec(PrefixedName elemName, boolean construct) return val; } - /** - * @param mainLevel Whether this is the main-level content specification or nested - */ - private ContentSpec readContentSpec(PrefixedName elemName, boolean mainLevel, - boolean construct) + private ContentSpec readContentSpec(final PrefixedName elemName, final boolean construct, final int recursionDepth) throws XMLStreamException { + if (recursionDepth > DTD_RECURSION_DEPTH_LIMIT) { + throw new XMLStreamException("FullDTDReader has reached recursion depth limit of " + DTD_RECURSION_DEPTH_LIMIT); + } + ArrayList subSpecs = new ArrayList(); boolean isChoice = false; // default to sequence boolean choiceSet = false; @@ -3087,7 +3108,7 @@ private ContentSpec readContentSpec(PrefixedName elemName, boolean mainLevel, } } if (c == '(') { - ContentSpec cs = readContentSpec(elemName, false, construct); + ContentSpec cs = readContentSpec(elemName, construct, recursionDepth + 1); subSpecs.add(cs); continue; } diff --git a/src/test/java/wstxtest/BaseWstxTest.java b/src/test/java/wstxtest/BaseWstxTest.java index 8e8bc6f5..64cec6fd 100644 --- a/src/test/java/wstxtest/BaseWstxTest.java +++ b/src/test/java/wstxtest/BaseWstxTest.java @@ -494,6 +494,29 @@ protected static String getAndVerifyText(XMLStreamReader sr) return text; } + protected byte[] readResource(String ref) + { + ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + final byte[] buf = new byte[4000]; + + InputStream in = getClass().getResourceAsStream(ref); + if (in != null) { + try { + int len; + while ((len = in.read(buf)) > 0) { + bytes.write(buf, 0, len); + } + in.close(); + } catch (IOException e) { + throw new RuntimeException("Failed to read resource '"+ref+"': "+e); + } + } + if (bytes.size() == 0) { + throw new IllegalArgumentException("Failed to read resource '"+ref+"': empty resource?"); + } + return bytes.toByteArray(); + } + /* ////////////////////////////////////////////////// // Debug/output helpers diff --git a/src/test/java/wstxtest/fuzz/Fuzz_DTDReadTest.java b/src/test/java/wstxtest/fuzz/Fuzz_DTDReadTest.java new file mode 100644 index 00000000..d784125e --- /dev/null +++ b/src/test/java/wstxtest/fuzz/Fuzz_DTDReadTest.java @@ -0,0 +1,76 @@ +package wstxtest.fuzz; + +import com.ctc.wstx.dtd.FullDTDReader; +import com.ctc.wstx.exc.WstxLazyException; +import com.ctc.wstx.stax.WstxInputFactory; +import org.codehaus.stax2.io.Stax2ByteArraySource; +import wstxtest.stream.BaseStreamTest; + +import javax.xml.stream.XMLStreamReader; +import java.io.ByteArrayInputStream; +import java.io.InputStreamReader; +import java.io.Reader; + +public class Fuzz_DTDReadTest extends BaseStreamTest +{ + private final byte[] DOC = readResource("/fuzz/clusterfuzz-testcase-modified-XmlFuzzer-5219006592450560.txt"); + + private final WstxInputFactory STAX_F = getWstxInputFactory(); + + public void testIssueInputStream() throws Exception + { + XMLStreamReader sr = STAX_F.createXMLStreamReader(new ByteArrayInputStream(DOC)); + try { + streamThrough(sr); + fail("Should not pass"); + } catch (WstxLazyException e) { + verifyException(e, "FullDTDReader has reached recursion depth limit of 500"); + } + sr.close(); + } + + public void testIssueInputStreamHigherRecursionLimit() throws Exception + { + final int defaultLimit = FullDTDReader.getDtdRecursionDepthLimit(); + XMLStreamReader sr = STAX_F.createXMLStreamReader(new ByteArrayInputStream(DOC)); + try { + FullDTDReader.setDtdRecursionDepthLimit(1000); + streamThrough(sr); + fail("Should not pass"); + } catch (WstxLazyException e) { + verifyException(e, "FullDTDReader has reached recursion depth limit of 1000"); + } finally { + FullDTDReader.setDtdRecursionDepthLimit(defaultLimit); + } + sr.close(); + } + + public void testIssueReader() throws Exception + { + Reader r = new InputStreamReader(new ByteArrayInputStream(DOC), + "UTF-8"); + XMLStreamReader sr = STAX_F.createXMLStreamReader(r); + try { + streamThrough(sr); + fail("Should not pass"); + } catch (WstxLazyException e) { + verifyException(e, "FullDTDReader has reached recursion depth limit of 500"); + } + sr.close(); + } + + public void testIssueStax2ByteArray() throws Exception + { + // Then "native" Byte array + Stax2ByteArraySource src = new Stax2ByteArraySource(DOC, 0, DOC.length); + XMLStreamReader sr = STAX_F.createXMLStreamReader(src); + try { + streamThrough(sr); + fail("Should not pass"); + } catch (WstxLazyException e) { + verifyException(e, "FullDTDReader has reached recursion depth limit of 500"); + } + sr.close(); + } +} + diff --git a/src/test/resources/fuzz/clusterfuzz-testcase-modified-XmlFuzzer-5219006592450560.txt b/src/test/resources/fuzz/clusterfuzz-testcase-modified-XmlFuzzer-5219006592450560.txt new file mode 100644 index 00000000..1fb049bd --- /dev/null +++ b/src/test/resources/fuzz/clusterfuzz-testcase-modified-XmlFuzzer-5219006592450560.txt @@ -0,0 +1 @@ + Date: Mon, 24 Oct 2022 16:26:50 -0700 Subject: [PATCH 2/2] Add release notes for #160, minor tweaks --- release-notes/VERSION | 4 ++- .../java/com/ctc/wstx/api/ReaderConfig.java | 27 ++++++++++++++- .../com/ctc/wstx/api/WstxInputProperties.java | 10 +++++- .../java/com/ctc/wstx/dtd/FullDTDReader.java | 30 +++------------- .../java/wstxtest/fuzz/Fuzz_DTDReadTest.java | 34 +++++++++++-------- 5 files changed, 62 insertions(+), 43 deletions(-) diff --git a/release-notes/VERSION b/release-notes/VERSION index 27008b36..b3847902 100644 --- a/release-notes/VERSION +++ b/release-notes/VERSION @@ -8,12 +8,14 @@ Project: woodstox #78: Shade MSV dependency -5.3.1 (not yet released) +5.4.0 (not yet released) #104: `NullPointerException` in `DTDValidator.validateElementEnd()` for element undefined in DTD (reported by ChrisTrenkamp@github) #105: W3CSchemaFactory constructor incorrectly references relaxng +#160: Add limit and configuration setting for maximum nesting for DTD subsets + (contributed by @pjfanning) 5.3.0 (15-Jul-2019) diff --git a/src/main/java/com/ctc/wstx/api/ReaderConfig.java b/src/main/java/com/ctc/wstx/api/ReaderConfig.java index df7588d5..83daa900 100644 --- a/src/main/java/com/ctc/wstx/api/ReaderConfig.java +++ b/src/main/java/com/ctc/wstx/api/ReaderConfig.java @@ -46,6 +46,9 @@ public final class ReaderConfig public final static int DEFAULT_MAX_ENTITY_DEPTH = 500; public final static int DEFAULT_MAX_ENTITY_COUNT = 100 * 1000; + // @since 5.4/6.4 + public final static int DEFAULT_MAX_DTD_DEPTH = 500; + /* /////////////////////////////////////////////////////////////////////// // Constants for reader properties: @@ -133,7 +136,9 @@ public final class ReaderConfig final static int PROP_MAX_TEXT_LENGTH = 66; final static int PROP_MAX_ENTITY_COUNT = 67; final static int PROP_MAX_ENTITY_DEPTH = 68; - + + final static int PROP_MAX_DTD_DEPTH = 69; + /* //////////////////////////////////////////////// // Limits for numeric properties @@ -341,6 +346,10 @@ public final class ReaderConfig PROP_MAX_ENTITY_DEPTH); sProperties.put(WstxInputProperties.P_MAX_ENTITY_COUNT, PROP_MAX_ENTITY_COUNT); + // since 5.4/6.4 + sProperties.put(WstxInputProperties.P_MAX_DTD_DEPTH, + PROP_MAX_DTD_DEPTH); + sProperties.put(WstxInputProperties.P_MAX_CHARACTERS, PROP_MAX_CHARACTERS); sProperties.put(WstxInputProperties.P_CUSTOM_INTERNAL_ENTITIES, Integer.valueOf(PROP_CUSTOM_INTERNAL_ENTITIES)); @@ -400,6 +409,9 @@ public final class ReaderConfig protected int mMaxEntityDepth = DEFAULT_MAX_ENTITY_DEPTH; protected long mMaxEntityCount = DEFAULT_MAX_ENTITY_COUNT; + + // since 5.4/6.4 + protected int mMaxDtdDepth = DEFAULT_MAX_DTD_DEPTH; /** * Base URL to use as the resolution context for relative entity @@ -506,6 +518,7 @@ private ReaderConfig(ReaderConfig base, mMaxTextLength = base.mMaxTextLength; mMaxEntityDepth = base.mMaxEntityDepth; mMaxEntityCount = base.mMaxEntityCount; + mMaxDtdDepth = base.mMaxDtdDepth; } /* Ok, let's then see if we can find a buffer recycler. Since they @@ -569,6 +582,7 @@ public ReaderConfig createNonShared(SymbolTable sym) rc.mMaxElementDepth = mMaxElementDepth; rc.mMaxEntityDepth = mMaxEntityDepth; rc.mMaxEntityCount = mMaxEntityCount; + rc.mMaxDtdDepth = mMaxDtdDepth; if (mSpecialProperties != null) { int len = mSpecialProperties.length; Object[] specProps = new Object[len]; @@ -735,6 +749,8 @@ public boolean willAllowXml11EscapedCharsInXml10() { public int getMaxEntityDepth() { return mMaxEntityDepth; } public long getMaxEntityCount() { return mMaxEntityCount; } + public int getMaxDtdDepth() { return mMaxDtdDepth; } + public long getMaxCharacters() { return mMaxCharacters; } public long getMaxTextLength() { return mMaxTextLength; } @@ -988,6 +1004,10 @@ public void setMaxEntityDepth(int value) { public void setMaxEntityCount(long value) { mMaxEntityCount = value; } + // @since 5.4/6.4 + public void setMaxDtdDepth(int value) { + mMaxDtdDepth = value; + } public void setCustomInternalEntities(Map m) { @@ -1486,6 +1506,8 @@ public Object getProperty(int id) return getMaxEntityDepth(); case PROP_MAX_ENTITY_COUNT: return getMaxEntityCount(); + case PROP_MAX_DTD_DEPTH: + return getMaxDtdDepth(); case PROP_MIN_TEXT_SEGMENT: return getShortestReportedTextSegment(); @@ -1674,6 +1696,9 @@ public boolean setProperty(String propName, int id, Object value) case PROP_MAX_ENTITY_COUNT: setMaxEntityCount(ArgUtil.convertToLong(propName, value, 1)); break; + case PROP_MAX_DTD_DEPTH: + setMaxDtdDepth(ArgUtil.convertToInt(propName, value, 1)); + break; case PROP_MIN_TEXT_SEGMENT: setShortestReportedTextSegment(ArgUtil.convertToInt(propName, value, 1)); diff --git a/src/main/java/com/ctc/wstx/api/WstxInputProperties.java b/src/main/java/com/ctc/wstx/api/WstxInputProperties.java index 5426b0ee..840e7ddd 100644 --- a/src/main/java/com/ctc/wstx/api/WstxInputProperties.java +++ b/src/main/java/com/ctc/wstx/api/WstxInputProperties.java @@ -184,7 +184,6 @@ public final class WstxInputProperties // // // Constraints on sizes of text segments parsed: - /** * Property to specify shortest non-complete text segment (part of * CDATA section or text content) that parser is allowed to return, @@ -252,6 +251,15 @@ public final class WstxInputProperties */ public final static String P_MAX_ENTITY_DEPTH = "com.ctc.wstx.maxEntityDepth"; + // and yet more size constraints (4.3+) + + /** + * Maximum level of nesting of XML elements, starting with root element. + * + * @since 5.4 / 6.4 + */ + public final static String P_MAX_DTD_DEPTH = "com.ctc.wstx.maxDtdDepth"; + // // // Entity handling /** diff --git a/src/main/java/com/ctc/wstx/dtd/FullDTDReader.java b/src/main/java/com/ctc/wstx/dtd/FullDTDReader.java index 19f7ab8e..ce1b69d2 100644 --- a/src/main/java/com/ctc/wstx/dtd/FullDTDReader.java +++ b/src/main/java/com/ctc/wstx/dtd/FullDTDReader.java @@ -74,9 +74,6 @@ public class FullDTDReader final static Boolean ENTITY_EXP_PE = Boolean.TRUE; - final static int DEFAULT_DTD_RECURSION_DEPTH_LIMIT = 500; - static int DTD_RECURSION_DEPTH_LIMIT = DEFAULT_DTD_RECURSION_DEPTH_LIMIT; - /* /////////////////////////////////////////////////////////// // Configuration @@ -330,24 +327,6 @@ public class FullDTDReader transient TextBuffer mTextBuffer = null; - /** - * Sets the limit on how many times the code will recurse through DTD data. - * The default is 500. - * @param limit new limit on how many times the code will recurse through DTD data - */ - public static void setDtdRecursionDepthLimit(final int limit) { - DTD_RECURSION_DEPTH_LIMIT = limit; - } - - /** - * Gets the limit on how many times the code will recurse through DTD data. - * The default is 500. - * @return limit on how many times the code will recurse through DTD data - */ - public static int getDtdRecursionDepthLimit() { - return DTD_RECURSION_DEPTH_LIMIT; - } - /* /////////////////////////////////////////////////////////// // Life-cycle @@ -3070,12 +3049,13 @@ private StructValidator readMixedSpec(PrefixedName elemName, boolean construct) return val; } - private ContentSpec readContentSpec(final PrefixedName elemName, final boolean construct, final int recursionDepth) + private ContentSpec readContentSpec(final PrefixedName elemName, final boolean construct, + final int recursionDepth) throws XMLStreamException { - if (recursionDepth > DTD_RECURSION_DEPTH_LIMIT) { - throw new XMLStreamException("FullDTDReader has reached recursion depth limit of " + DTD_RECURSION_DEPTH_LIMIT); - } + verifyLimit("Maximum DTD nesting depth (WstxInputProperties.P_MAX_DTD_DEPTH)", + mConfig.getMaxDtdDepth(), + recursionDepth); ArrayList subSpecs = new ArrayList(); boolean isChoice = false; // default to sequence diff --git a/src/test/java/wstxtest/fuzz/Fuzz_DTDReadTest.java b/src/test/java/wstxtest/fuzz/Fuzz_DTDReadTest.java index d784125e..389d4d01 100644 --- a/src/test/java/wstxtest/fuzz/Fuzz_DTDReadTest.java +++ b/src/test/java/wstxtest/fuzz/Fuzz_DTDReadTest.java @@ -1,16 +1,17 @@ package wstxtest.fuzz; -import com.ctc.wstx.dtd.FullDTDReader; +import java.io.ByteArrayInputStream; +import java.io.InputStreamReader; +import java.io.Reader; + +import javax.xml.stream.XMLStreamReader; + +import com.ctc.wstx.api.WstxInputProperties; import com.ctc.wstx.exc.WstxLazyException; import com.ctc.wstx.stax.WstxInputFactory; import org.codehaus.stax2.io.Stax2ByteArraySource; import wstxtest.stream.BaseStreamTest; -import javax.xml.stream.XMLStreamReader; -import java.io.ByteArrayInputStream; -import java.io.InputStreamReader; -import java.io.Reader; - public class Fuzz_DTDReadTest extends BaseStreamTest { private final byte[] DOC = readResource("/fuzz/clusterfuzz-testcase-modified-XmlFuzzer-5219006592450560.txt"); @@ -24,23 +25,24 @@ public void testIssueInputStream() throws Exception streamThrough(sr); fail("Should not pass"); } catch (WstxLazyException e) { - verifyException(e, "FullDTDReader has reached recursion depth limit of 500"); + verifyException(e, "Maximum DTD nesting depth"); + verifyException(e, "500"); } sr.close(); } public void testIssueInputStreamHigherRecursionLimit() throws Exception { - final int defaultLimit = FullDTDReader.getDtdRecursionDepthLimit(); - XMLStreamReader sr = STAX_F.createXMLStreamReader(new ByteArrayInputStream(DOC)); + final WstxInputFactory staxF = getWstxInputFactory(); + staxF.setProperty(WstxInputProperties.P_MAX_DTD_DEPTH, 1100); + + XMLStreamReader sr = staxF.createXMLStreamReader(new ByteArrayInputStream(DOC)); try { - FullDTDReader.setDtdRecursionDepthLimit(1000); streamThrough(sr); fail("Should not pass"); } catch (WstxLazyException e) { - verifyException(e, "FullDTDReader has reached recursion depth limit of 1000"); - } finally { - FullDTDReader.setDtdRecursionDepthLimit(defaultLimit); + verifyException(e, "Maximum DTD nesting depth"); + verifyException(e, "1100"); } sr.close(); } @@ -54,7 +56,8 @@ public void testIssueReader() throws Exception streamThrough(sr); fail("Should not pass"); } catch (WstxLazyException e) { - verifyException(e, "FullDTDReader has reached recursion depth limit of 500"); + verifyException(e, "Maximum DTD nesting depth"); + verifyException(e, "500"); } sr.close(); } @@ -68,7 +71,8 @@ public void testIssueStax2ByteArray() throws Exception streamThrough(sr); fail("Should not pass"); } catch (WstxLazyException e) { - verifyException(e, "FullDTDReader has reached recursion depth limit of 500"); + verifyException(e, "Maximum DTD nesting depth"); + verifyException(e, "500"); } sr.close(); }