Skip to content

Commit

Permalink
Offer suggestions for lift manifest typos. (#51)
Browse files Browse the repository at this point in the history
Also make the array indexing basis clear when array indexes are used.
  • Loading branch information
jsirois authored Sep 29, 2023
1 parent 8ca0c07 commit 79bb1cb
Show file tree
Hide file tree
Showing 7 changed files with 157 additions and 35 deletions.
5 changes: 5 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Release Notes

## 0.3.0

The unrecognized lift manifest configuration data error message now includes suggestions when the
unrecognized configuration is likely a typo.

## 0.2.2

Unrecognized lift manifest configuration data now generates an informative error instead of
Expand Down
2 changes: 1 addition & 1 deletion RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
### Version Bump and Changelog

1. Bump the version in at [`science/__init__.py`](science/__init__.py).
2. Run `nox && nox -epackage` as a sanity check on the state of the project.
2. Run `nox && nox -e linkcheck package` as a sanity check on the state of the project.
3. Update [`CHANGES.md`](CHANGES.md) with any changes that are likely to be useful to consumers.
4. Open a PR with these changes and land it on https://github.com/a-scie/lift main.

Expand Down
2 changes: 1 addition & 1 deletion science/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@

from packaging.version import Version

__version__ = "0.2.2"
__version__ = "0.3.0"

VERSION = Version(__version__)
107 changes: 91 additions & 16 deletions science/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,23 @@

from __future__ import annotations

import dataclasses
import difflib
import tomllib
from collections import OrderedDict
from collections import defaultdict
from collections.abc import Iterator, Mapping
from dataclasses import dataclass
from functools import cache
from io import BytesIO
from pathlib import Path
from textwrap import dedent
from typing import BinaryIO
from typing import BinaryIO, Generic, TypeVar

from science.build_info import BuildInfo
from science.data import Data
from science.data import Accessor, Data
from science.dataclass import Dataclass
from science.dataclass.deserializer import parse as parse_dataclass
from science.dataclass.reflect import FieldInfo, dataclass_info
from science.doc import DOC_SITE_URL
from science.errors import InputError
from science.frozendict import FrozenDict
Expand Down Expand Up @@ -74,20 +79,73 @@ class InterpreterGroupFields(Dataclass):
members: tuple[str, ...]


def _iter_field_info(datatype: type[Dataclass]) -> Iterator[FieldInfo]:
for field_info in dataclass_info(datatype).field_info:
if not field_info.hidden and not field_info.inline:
yield field_info
elif field_info.inline and (dt := field_info.type.dataclass):
yield from _iter_field_info(dt)


_D = TypeVar("_D", bound=Dataclass)


@dataclass(frozen=True)
class ValidConfig(Generic[_D]):
@classmethod
@cache
def gather(cls, datatype: type[_D]) -> ValidConfig:
return cls(
datatype=datatype,
fields=FrozenDict(
{field_info.name: field_info for field_info in _iter_field_info(datatype)}
),
)

datatype: type[_D]
fields: FrozenDict[str, FieldInfo]

def access(self, field_name: str) -> ValidConfig | None:
field_info = self.fields.get(field_name)
if not field_info:
return None

if datatype := field_info.type.dataclass:
return ValidConfig.gather(datatype)

if field_info.type.has_item_type and dataclasses.is_dataclass(
item_type := field_info.type.item_type
):
return ValidConfig.gather(item_type)

return None


def gather_unrecognized_application_config(
lift: Data, index_start: int
) -> Mapping[Accessor, ValidConfig | None]:
valid_config_by_unused_accessor: dict[Accessor, ValidConfig | None] = {}
for unused_accessor, _ in lift.iter_unused_items(index_start=index_start):
valid_config: ValidConfig | None = ValidConfig.gather(Application)
for accessor in unused_accessor.iter_lineage():
if not valid_config:
break
valid_config = valid_config.access(accessor.key)
valid_config_by_unused_accessor[unused_accessor] = valid_config
return valid_config_by_unused_accessor


def parse_config_data(data: Data) -> Application:
lift = data.get_data("lift")

interpreters_by_id = OrderedDict(
interpreters_by_id = {
(
(
interp := parse_dataclass(
interpreter, Interpreter, custom_parsers={Provider: parse_provider}
)
).id,
interp,
)
interp := parse_dataclass(
interpreter, Interpreter, custom_parsers={Provider: parse_provider}
)
).id: interp
for interpreter in lift.get_data_list("interpreters", default=[])
)
}

def parse_interpreter_group(ig_data: Data) -> InterpreterGroup:
fields = parse_dataclass(ig_data, InterpreterGroupFields)
Expand All @@ -111,20 +169,37 @@ def parse_interpreter_group(ig_data: Data) -> InterpreterGroup:
custom_parsers={BuildInfo: parse_build_info, InterpreterGroup: parse_interpreter_group},
)

unused_items = list(lift.iter_unused_items())
if unused_items:
unrecognized_config = gather_unrecognized_application_config(lift, index_start=1)
if unrecognized_config:
unrecognized_field_info = defaultdict[str, list[str]](list)
index_used = False
for accessor, valid_config in unrecognized_config.items():
index_used |= accessor.path_includes_index()
suggestions = unrecognized_field_info[accessor.render()]
if valid_config:
suggestions.extend(difflib.get_close_matches(accessor.key, valid_config.fields))

field_column_width = max(map(len, unrecognized_field_info))
unrecognized_fields = []
for name, suggestions in unrecognized_field_info.items():
line = name.rjust(field_column_width)
if suggestions:
line += f": Did you mean {' or '.join(suggestions)}?"
unrecognized_fields.append(line)

raise InputError(
dedent(
"""\
The following `lift` manifest entries in {manifest_source} were not recognized:
The following `lift` manifest entries in {manifest_source} were not recognized{index_parenthetical}:
{unrecognized_fields}
Refer to the lift manifest format specification at {doc_url} or by running `science doc open manifest`.
"""
)
.format(
manifest_source=data.provenance.source,
unrecognized_fields="\n".join(key for key, _ in unused_items),
index_parenthetical=" (indexes are 1-based)" if index_used else "",
unrecognized_fields="\n".join(unrecognized_fields),
doc_url=f"{DOC_SITE_URL}/manifest.html",
)
.strip()
Expand Down
56 changes: 49 additions & 7 deletions science/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,40 @@
from science.frozendict import FrozenDict
from science.hashing import Provenance


@dataclass(frozen=True)
class Accessor:
key: str
parent: Accessor | None = None
_index: int | None = None

def index(self, index: int) -> Accessor:
return dataclasses.replace(self, _index=index)

def render(self) -> str:
if self.parent:
rendered = f"{self.parent.render()}.{self.key}"
else:
rendered = self.key

if self._index is not None:
rendered += f"[{self._index}]"
return rendered

def iter_lineage(self, include_self: bool = False) -> Iterator[Accessor]:
if self.parent:
yield from self.parent.iter_lineage(include_self=True)
if include_self:
yield self

def path_includes_index(self) -> bool:
if self._index is not None:
return True
if self.parent and self.parent.path_includes_index():
return True
return False


_T = TypeVar("_T")


Expand Down Expand Up @@ -150,17 +184,25 @@ def get_value(
self._unused_data.pop(key, None)
return value

def iter_unused_items(self) -> Iterator[tuple[str, Any]]:
def iter_unused_items(
self, parent: Accessor | None = None, index_start: int = 0
) -> Iterator[tuple[Accessor, Any]]:
for key, value in self._unused_data.items():
accessor = Accessor(key, parent=parent)
if isinstance(value, list) and all(isinstance(item, Data) for item in value):
for index, item in enumerate(value, start=1):
for sub_key, sub_value in item.iter_unused_items():
yield f"{key}[{index}].{sub_key}", sub_value
for index, item in enumerate(value, start=index_start):
accessor = accessor.index(index)
for sub_accessor, sub_value in item.iter_unused_items(
parent=accessor, index_start=index_start
):
yield sub_accessor, sub_value
elif isinstance(value, Data):
for sub_key, sub_value in value.iter_unused_items():
yield f"{key}.{sub_key}", sub_value
for sub_accessor, sub_value in value.iter_unused_items(
parent=accessor, index_start=index_start
):
yield sub_accessor, sub_value
else:
yield key, value
yield accessor, value

def __bool__(self):
return bool(self.data)
4 changes: 2 additions & 2 deletions tests/data/unrecognized-config-fields.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ members = [
]

[[lift.commands]]
# N.B: `environ` is invalid.
environ = { key = "value" }
# N.B: `just_wrong` is invalid.
just_wrong = { key = "value" }
exe = "#{cpython:python}"
args = [
"-c",
Expand Down
16 changes: 8 additions & 8 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,14 +172,14 @@ def test_unrecognized_config_fields(tmp_path: Path, science_pyz: Path) -> None:
assert (
dedent(
f"""\
The following `lift` manifest entries in {config} were not recognized:
scie-jump
scie_jump.version2
interpreters[2].lizzy
commands[1].environ
commands[1].env.remove_re2
commands[1].env.replace2
app-info
The following `lift` manifest entries in {config} were not recognized (indexes are 1-based):
scie-jump: Did you mean scie_jump?
scie_jump.version2: Did you mean version?
interpreters[2].lizzy: Did you mean lazy?
commands[1].just_wrong
commands[1].env.remove_re2: Did you mean remove_re or remove_exact?
commands[1].env.replace2: Did you mean replace?
app-info: Did you mean app_info?
Refer to the lift manifest format specification at https://science.scie.app/manifest.html or by running `science doc open manifest`.
"""
Expand Down

0 comments on commit 79bb1cb

Please sign in to comment.