Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-117348: Refactored RawConfigParser._read for simplicity and comprehensibility #117372

Merged
Merged
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
a25ac00
Extract method for _read_inner, reducing complexity and indentation b…
jaraco Mar 27, 2024
1e69aae
Extract method for _raise_all and yield ParseErrors from _read_inner.
jaraco Mar 27, 2024
8c47781
Prefer iterators to splat expansion and literal indexing.
jaraco Mar 27, 2024
7479814
Extract method for _strip_comments. Reduces complexity by 7.
jaraco Mar 27, 2024
a2fffee
Model the file lines in a class to encapsulate the comment status and…
jaraco Mar 28, 2024
23468cb
Encapsulate the read state as a dataclass
jaraco Mar 28, 2024
3d1ef0a
Extract _handle_continuation_line and _handle_rest methods. Reduces c…
jaraco Mar 28, 2024
071baeb
Reindent
jaraco Mar 28, 2024
81f4ce2
At least for now, collect errors in the ReadState
jaraco Mar 28, 2024
8942cc1
Check for missing section header separately.
jaraco Mar 28, 2024
1e72168
Extract methods for _handle_header and _handle_option. Reduces comple…
jaraco Mar 28, 2024
0dfd797
Remove unreachable code. Reduces complexity by 4.
jaraco Mar 28, 2024
77ed897
Remove unreachable branch
jaraco Mar 28, 2024
76f42d3
Handle error condition early. Reduces complexity by 1.
jaraco Mar 28, 2024
c18a2bb
Add blurb
jaraco Mar 29, 2024
97aa785
Move _raise_all to ParsingError, as its behavior is most closely rela…
jaraco Mar 29, 2024
d310cb4
Split _strip* into separate methods.
jaraco Mar 29, 2024
f2a355c
Refactor _strip_full to compute the strip just once and use 'not any'…
jaraco Mar 29, 2024
2492614
Replace use of 'sys.maxsize' with direct computation of the stripped …
jaraco Mar 29, 2024
4968591
Extract has_comments as a dynamic property.
jaraco Mar 29, 2024
7d807bb
Implement clean as a cached property.
jaraco Mar 29, 2024
29cb20f
Model comment prefixes in the RawConfigParser within a prefixes names…
jaraco Mar 29, 2024
c834c35
Use a regular expression to search for the first match.
jaraco Mar 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Encapsulate the read state as a dataclass
  • Loading branch information
jaraco committed Mar 29, 2024
commit 23468cbaac87924417af3bce3f608289696a0bef
145 changes: 76 additions & 69 deletions Lib/configparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@
from collections.abc import MutableMapping
from collections import ChainMap as _ChainMap
import contextlib
from dataclasses import dataclass, field
import functools
import io
import itertools
Expand Down Expand Up @@ -526,6 +527,17 @@ def _interpolate_some(self, parser, option, accum, rest, section, map,
"'$' must be followed by '$' or '{', "
"found: %r" % (rest,))


@dataclass
class _ReadState:
elements_added : set[str] = field(default_factory=set)
cursect : dict[str, str] | None = None
sectname : str | None = None
optname : str | None = None
lineno : int = 0
indent_level : int = 0


class _Line(str):
def _strip_comments(self, prefixes, inline_prefixes):
comment_start = sys.maxsize
Expand Down Expand Up @@ -1024,121 +1036,116 @@ def _raise_all(self, exceptions: Iterator[ParsingError]):
raise next(exceptions).combine(exceptions)

def _read_inner(self, fp, fpname):
elements_added = set()
cursect = None # None, or a dictionary
sectname = None
optname = None
lineno = 0
indent_level = 0
st = _ReadState()

for lineno, line in enumerate(map(_Line, fp), start=1):
for st.lineno, line in enumerate(map(_Line, fp), start=1):
line._strip_comments(self._comment_prefixes, self._inline_comment_prefixes)

if not line.clean:
if self._empty_lines_in_values:
# add empty line to the value, but only if there was no
# comment on the line
if (not line.has_comments and
cursect is not None and
optname and
cursect[optname] is not None):
cursect[optname].append('') # newlines added at join
st.cursect is not None and
st.optname and
st.cursect[st.optname] is not None):
st.cursect[st.optname].append('') # newlines added at join
else:
# empty line marks end of value
indent_level = sys.maxsize
st.indent_level = sys.maxsize
continue
# continuation line?
first_nonspace = self.NONSPACECRE.search(line)
cur_indent_level = first_nonspace.start() if first_nonspace else 0
if (cursect is not None and optname and
cur_indent_level > indent_level):
if cursect[optname] is None:
raise MultilineContinuationError(fpname, lineno, line)
cursect[optname].append(line.clean)
if (st.cursect is not None and st.optname and
cur_indent_level > st.indent_level):
if st.cursect[st.optname] is None:
raise MultilineContinuationError(fpname, st.lineno, line)
st.cursect[st.optname].append(line.clean)
# a section header or option header?
else:
if self._allow_unnamed_section and cursect is None:
sectname = UNNAMED_SECTION
cursect = self._dict()
self._sections[sectname] = cursect
self._proxies[sectname] = SectionProxy(self, sectname)
elements_added.add(sectname)

indent_level = cur_indent_level
if self._allow_unnamed_section and st.cursect is None:
st.sectname = UNNAMED_SECTION
st.cursect = self._dict()
self._sections[st.sectname] = st.cursect
self._proxies[st.sectname] = SectionProxy(self, st.sectname)
st.elements_added.add(st.sectname)

st.indent_level = cur_indent_level
# is it a section header?
mo = self.SECTCRE.match(line.clean)
if mo:
sectname = mo.group('header')
if sectname in self._sections:
if self._strict and sectname in elements_added:
raise DuplicateSectionError(sectname, fpname,
lineno)
cursect = self._sections[sectname]
elements_added.add(sectname)
elif sectname == self.default_section:
cursect = self._defaults
st.sectname = mo.group('header')
if st.sectname in self._sections:
if self._strict and st.sectname in st.elements_added:
raise DuplicateSectionError(st.sectname, fpname,
st.lineno)
st.cursect = self._sections[st.sectname]
st.elements_added.add(st.sectname)
elif st.sectname == self.default_section:
st.cursect = self._defaults
else:
cursect = self._dict()
self._sections[sectname] = cursect
self._proxies[sectname] = SectionProxy(self, sectname)
elements_added.add(sectname)
st.cursect = self._dict()
self._sections[st.sectname] = st.cursect
self._proxies[st.sectname] = SectionProxy(self, st.sectname)
st.elements_added.add(st.sectname)
# So sections can't start with a continuation line
optname = None
st.optname = None
# no section header?
elif cursect is None:
raise MissingSectionHeaderError(fpname, lineno, line)
elif st.cursect is None:
raise MissingSectionHeaderError(fpname, st.lineno, line)
# an option line?
else:
indent_level = cur_indent_level
st.indent_level = cur_indent_level
# is it a section header?
mo = self.SECTCRE.match(line.clean)
if mo:
sectname = mo.group('header')
if sectname in self._sections:
if self._strict and sectname in elements_added:
raise DuplicateSectionError(sectname, fpname,
lineno)
cursect = self._sections[sectname]
elements_added.add(sectname)
elif sectname == self.default_section:
cursect = self._defaults
st.sectname = mo.group('header')
if st.sectname in self._sections:
if self._strict and st.sectname in st.elements_added:
raise DuplicateSectionError(st.sectname, fpname,
st.lineno)
st.cursect = self._sections[st.sectname]
st.elements_added.add(st.sectname)
elif st.sectname == self.default_section:
st.cursect = self._defaults
else:
cursect = self._dict()
self._sections[sectname] = cursect
self._proxies[sectname] = SectionProxy(self, sectname)
elements_added.add(sectname)
st.cursect = self._dict()
self._sections[st.sectname] = st.cursect
self._proxies[st.sectname] = SectionProxy(self, st.sectname)
st.elements_added.add(st.sectname)
# So sections can't start with a continuation line
optname = None
st.optname = None
# no section header in the file?
elif cursect is None:
raise MissingSectionHeaderError(fpname, lineno, line)
elif st.cursect is None:
raise MissingSectionHeaderError(fpname, st.lineno, line)
# an option line?
else:
mo = self._optcre.match(line.clean)
if mo:
optname, vi, optval = mo.group('option', 'vi', 'value')
if not optname:
yield ParsingError(fpname, lineno, line)
optname = self.optionxform(optname.rstrip())
st.optname, vi, optval = mo.group('option', 'vi', 'value')
if not st.optname:
yield ParsingError(fpname, st.lineno, line)
st.optname = self.optionxform(st.optname.rstrip())
if (self._strict and
(sectname, optname) in elements_added):
raise DuplicateOptionError(sectname, optname,
fpname, lineno)
elements_added.add((sectname, optname))
(st.sectname, st.optname) in st.elements_added):
raise DuplicateOptionError(st.sectname, st.optname,
fpname, st.lineno)
st.elements_added.add((st.sectname, st.optname))
# This check is fine because the OPTCRE cannot
# match if it would set optval to None
if optval is not None:
optval = optval.strip()
cursect[optname] = [optval]
st.cursect[st.optname] = [optval]
else:
# valueless option handling
cursect[optname] = None
st.cursect[st.optname] = None
else:
# a non-fatal parsing error occurred. set up the
# exception but keep going. the exception will be
# raised at the end of the file and will contain a
# list of all bogus lines
yield ParsingError(fpname, lineno, line)
yield ParsingError(fpname, st.lineno, line)

def _join_multiline_values(self):
defaults = self.default_section, self._defaults
Expand Down