Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-117348: Refactored RawConfigParser._read for simplicity and comprehensibility #117372

Merged
Merged
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
a25ac00
Extract method for _read_inner, reducing complexity and indentation b…
jaraco Mar 27, 2024
1e69aae
Extract method for _raise_all and yield ParseErrors from _read_inner.
jaraco Mar 27, 2024
8c47781
Prefer iterators to splat expansion and literal indexing.
jaraco Mar 27, 2024
7479814
Extract method for _strip_comments. Reduces complexity by 7.
jaraco Mar 27, 2024
a2fffee
Model the file lines in a class to encapsulate the comment status and…
jaraco Mar 28, 2024
23468cb
Encapsulate the read state as a dataclass
jaraco Mar 28, 2024
3d1ef0a
Extract _handle_continuation_line and _handle_rest methods. Reduces c…
jaraco Mar 28, 2024
071baeb
Reindent
jaraco Mar 28, 2024
81f4ce2
At least for now, collect errors in the ReadState
jaraco Mar 28, 2024
8942cc1
Check for missing section header separately.
jaraco Mar 28, 2024
1e72168
Extract methods for _handle_header and _handle_option. Reduces comple…
jaraco Mar 28, 2024
0dfd797
Remove unreachable code. Reduces complexity by 4.
jaraco Mar 28, 2024
77ed897
Remove unreachable branch
jaraco Mar 28, 2024
76f42d3
Handle error condition early. Reduces complexity by 1.
jaraco Mar 28, 2024
c18a2bb
Add blurb
jaraco Mar 29, 2024
97aa785
Move _raise_all to ParsingError, as its behavior is most closely rela…
jaraco Mar 29, 2024
d310cb4
Split _strip* into separate methods.
jaraco Mar 29, 2024
f2a355c
Refactor _strip_full to compute the strip just once and use 'not any'…
jaraco Mar 29, 2024
2492614
Replace use of 'sys.maxsize' with direct computation of the stripped …
jaraco Mar 29, 2024
4968591
Extract has_comments as a dynamic property.
jaraco Mar 29, 2024
7d807bb
Implement clean as a cached property.
jaraco Mar 29, 2024
29cb20f
Model comment prefixes in the RawConfigParser within a prefixes names…
jaraco Mar 29, 2024
c834c35
Use a regular expression to search for the first match.
jaraco Mar 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Extract method for _read_inner, reducing complexity and indentation b…
…y 1.
  • Loading branch information
jaraco committed Mar 29, 2024
commit a25ac002617c9756cd0f4ff8f0d0692134006592
202 changes: 104 additions & 98 deletions Lib/configparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -975,6 +975,13 @@ def _read(self, fp, fpname):
in an otherwise empty line or may be entered in lines holding values or
section names. Please note that comments get stripped off when reading configuration files.
"""

try:
self._read_inner(fp, fpname)
finally:
self._join_multiline_values()

def _read_inner(self, fp, fpname):
elements_added = set()
cursect = None # None, or a dictionary
sectname = None
Expand All @@ -983,59 +990,83 @@ def _read(self, fp, fpname):
indent_level = 0
e = None # None, or an exception

try:
for lineno, line in enumerate(fp, start=1):
comment_start = sys.maxsize
# strip inline comments
inline_prefixes = {p: -1 for p in self._inline_comment_prefixes}
while comment_start == sys.maxsize and inline_prefixes:
next_prefixes = {}
for prefix, index in inline_prefixes.items():
index = line.find(prefix, index+1)
if index == -1:
continue
next_prefixes[prefix] = index
if index == 0 or (index > 0 and line[index-1].isspace()):
comment_start = min(comment_start, index)
inline_prefixes = next_prefixes
# strip full line comments
for prefix in self._comment_prefixes:
if line.strip().startswith(prefix):
comment_start = 0
break
if comment_start == sys.maxsize:
comment_start = None
value = line[:comment_start].strip()
if not value:
if self._empty_lines_in_values:
# add empty line to the value, but only if there was no
# comment on the line
if (comment_start is None and
cursect is not None and
optname and
cursect[optname] is not None):
cursect[optname].append('') # newlines added at join
else:
# empty line marks end of value
indent_level = sys.maxsize
continue
# continuation line?
first_nonspace = self.NONSPACECRE.search(line)
cur_indent_level = first_nonspace.start() if first_nonspace else 0
if (cursect is not None and optname and
cur_indent_level > indent_level):
if cursect[optname] is None:
raise MultilineContinuationError(fpname, lineno, line)
cursect[optname].append(value)
# a section header or option header?
for lineno, line in enumerate(fp, start=1):
comment_start = sys.maxsize
# strip inline comments
inline_prefixes = {p: -1 for p in self._inline_comment_prefixes}
while comment_start == sys.maxsize and inline_prefixes:
next_prefixes = {}
for prefix, index in inline_prefixes.items():
index = line.find(prefix, index+1)
if index == -1:
continue
next_prefixes[prefix] = index
if index == 0 or (index > 0 and line[index-1].isspace()):
comment_start = min(comment_start, index)
inline_prefixes = next_prefixes
# strip full line comments
for prefix in self._comment_prefixes:
if line.strip().startswith(prefix):
comment_start = 0
break
if comment_start == sys.maxsize:
comment_start = None
value = line[:comment_start].strip()
if not value:
if self._empty_lines_in_values:
# add empty line to the value, but only if there was no
# comment on the line
if (comment_start is None and
cursect is not None and
optname and
cursect[optname] is not None):
cursect[optname].append('') # newlines added at join
else:
if self._allow_unnamed_section and cursect is None:
sectname = UNNAMED_SECTION
# empty line marks end of value
indent_level = sys.maxsize
continue
# continuation line?
first_nonspace = self.NONSPACECRE.search(line)
cur_indent_level = first_nonspace.start() if first_nonspace else 0
if (cursect is not None and optname and
cur_indent_level > indent_level):
if cursect[optname] is None:
raise MultilineContinuationError(fpname, lineno, line)
cursect[optname].append(value)
# a section header or option header?
else:
if self._allow_unnamed_section and cursect is None:
sectname = UNNAMED_SECTION
cursect = self._dict()
self._sections[sectname] = cursect
self._proxies[sectname] = SectionProxy(self, sectname)
elements_added.add(sectname)

indent_level = cur_indent_level
# is it a section header?
mo = self.SECTCRE.match(value)
if mo:
sectname = mo.group('header')
if sectname in self._sections:
if self._strict and sectname in elements_added:
raise DuplicateSectionError(sectname, fpname,
lineno)
cursect = self._sections[sectname]
elements_added.add(sectname)
elif sectname == self.default_section:
cursect = self._defaults
else:
cursect = self._dict()
self._sections[sectname] = cursect
self._proxies[sectname] = SectionProxy(self, sectname)
elements_added.add(sectname)

# So sections can't start with a continuation line
optname = None
# no section header?
elif cursect is None:
raise MissingSectionHeaderError(fpname, lineno, line)
# an option line?
else:
indent_level = cur_indent_level
# is it a section header?
mo = self.SECTCRE.match(value)
Expand All @@ -1056,67 +1087,42 @@ def _read(self, fp, fpname):
elements_added.add(sectname)
# So sections can't start with a continuation line
optname = None
# no section header?
# no section header in the file?
elif cursect is None:
raise MissingSectionHeaderError(fpname, lineno, line)
# an option line?
else:
indent_level = cur_indent_level
# is it a section header?
mo = self.SECTCRE.match(value)
mo = self._optcre.match(value)
if mo:
sectname = mo.group('header')
if sectname in self._sections:
if self._strict and sectname in elements_added:
raise DuplicateSectionError(sectname, fpname,
lineno)
cursect = self._sections[sectname]
elements_added.add(sectname)
elif sectname == self.default_section:
cursect = self._defaults
optname, vi, optval = mo.group('option', 'vi', 'value')
if not optname:
e = self._handle_error(e, fpname, lineno, line)
optname = self.optionxform(optname.rstrip())
if (self._strict and
(sectname, optname) in elements_added):
raise DuplicateOptionError(sectname, optname,
fpname, lineno)
elements_added.add((sectname, optname))
# This check is fine because the OPTCRE cannot
# match if it would set optval to None
if optval is not None:
optval = optval.strip()
cursect[optname] = [optval]
else:
cursect = self._dict()
self._sections[sectname] = cursect
self._proxies[sectname] = SectionProxy(self, sectname)
elements_added.add(sectname)
# So sections can't start with a continuation line
optname = None
# no section header in the file?
elif cursect is None:
raise MissingSectionHeaderError(fpname, lineno, line)
# an option line?
# valueless option handling
cursect[optname] = None
else:
mo = self._optcre.match(value)
if mo:
optname, vi, optval = mo.group('option', 'vi', 'value')
if not optname:
e = self._handle_error(e, fpname, lineno, line)
optname = self.optionxform(optname.rstrip())
if (self._strict and
(sectname, optname) in elements_added):
raise DuplicateOptionError(sectname, optname,
fpname, lineno)
elements_added.add((sectname, optname))
# This check is fine because the OPTCRE cannot
# match if it would set optval to None
if optval is not None:
optval = optval.strip()
cursect[optname] = [optval]
else:
# valueless option handling
cursect[optname] = None
else:
# a non-fatal parsing error occurred. set up the
# exception but keep going. the exception will be
# raised at the end of the file and will contain a
# list of all bogus lines
e = self._handle_error(e, fpname, lineno, line)
finally:
self._join_multiline_values()
# a non-fatal parsing error occurred. set up the
# exception but keep going. the exception will be
# raised at the end of the file and will contain a
# list of all bogus lines
e = self._handle_error(e, fpname, lineno, line)

# if any parsing errors occurred, raise an exception
if e:
raise e


def _join_multiline_values(self):
defaults = self.default_section, self._defaults
all_sections = itertools.chain((defaults,),
Expand Down