Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

And upgrade torch, numpy, pandas and accelerate as well as black, pyr… #100

Merged
merged 2 commits into from
Feb 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .flake8
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[flake8]
max-line-length = 120
max-line-length = 125
extend-ignore = E203
per-file-ignores = **/*.pyi:E252,E301,E302,E305,E501,E701,E704,E741,F401,F811,F821
exclude =
Expand Down
12 changes: 4 additions & 8 deletions machine/annotations/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,25 +69,21 @@ def __repr__(self) -> str:
class _RangeFactory(ABC, Generic[Offset]):
@property
@abstractmethod
def include_endpoint(self) -> bool:
...
def include_endpoint(self) -> bool: ...

def create(self, start: Offset, end: Optional[Offset]) -> Range[Offset]:
if end is None:
end = start
return Range(self, start, end)

@abstractmethod
def get_length(self, start: Offset, end: Offset) -> int:
...
def get_length(self, start: Offset, end: Offset) -> int: ...

@abstractmethod
def iterate(self, start: Offset, end: Offset) -> Iterable[Offset]:
...
def iterate(self, start: Offset, end: Offset) -> Iterable[Offset]: ...

@abstractmethod
def offset_compare(self, x: Offset, y: Offset) -> int:
...
def offset_compare(self, x: Offset, y: Offset) -> int: ...


class _IntRangeFactory(_RangeFactory[int]):
Expand Down
6 changes: 2 additions & 4 deletions machine/clusterers/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,10 @@

class Cluster(Generic[T]):
@overload
def __init__(self, *data_objects: T, noise: bool = False, description: Optional[str] = None) -> None:
...
def __init__(self, *data_objects: T, noise: bool = False, description: Optional[str] = None) -> None: ...

@overload
def __init__(self, data_objects: Iterable[T], noise: bool = False, description: Optional[str] = None) -> None:
...
def __init__(self, data_objects: Iterable[T], noise: bool = False, description: Optional[str] = None) -> None: ...

def __init__(self, *args, **kwargs) -> None:
self._data_objects: FrozenSet[T]
Expand Down
3 changes: 1 addition & 2 deletions machine/clusterers/flat_clusterer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,4 @@

class FlatClusterer(ABC, Generic[T]):
@abstractmethod
def generate_clusters(self, data_objects: Iterable[T]) -> Iterable[Cluster[T]]:
...
def generate_clusters(self, data_objects: Iterable[T]) -> Iterable[Cluster[T]]: ...
3 changes: 1 addition & 2 deletions machine/clusterers/rooted_hierarchical_clusterer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,4 @@

class RootedHierarchicalClusterer(ABC, Generic[T]):
@abstractmethod
def generate_clusters(self, data_objects: Iterable[T]) -> DiGraph[Cluster[T]]:
...
def generate_clusters(self, data_objects: Iterable[T]) -> DiGraph[Cluster[T]]: ...
3 changes: 1 addition & 2 deletions machine/clusterers/unrooted_hierarchical_clusterer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,4 @@

class UnrootedHierarchicalClusterer(ABC, Generic[T]):
@abstractmethod
def generate_clusters(self, data_objects: Iterable[T]) -> Graph[Cluster[T]]:
...
def generate_clusters(self, data_objects: Iterable[T]) -> Graph[Cluster[T]]: ...
3 changes: 2 additions & 1 deletion machine/clusterers/upgma_clusterer.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,5 +97,6 @@ def get_all_data_objects_count(tree: DiGraph[Cluster[T]], cluster: Cluster[T]) -
if tree.out_degree(cluster) == 0:
return len(cluster.data_objects)
return sum(
(get_all_data_objects_count(tree, edge[1]) for edge in tree.out_edges(cluster)), len(cluster.data_objects)
(get_all_data_objects_count(tree, edge[1]) for edge in tree.out_edges(cluster)),
len(cluster.data_objects),
)
6 changes: 2 additions & 4 deletions machine/corpora/alignment_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@
class AlignmentCollection(Corpus[AlignmentRow]):
@property
@abstractmethod
def id(self) -> str:
...
def id(self) -> str: ...

@property
@abstractmethod
def sort_key(self) -> str:
...
def sort_key(self) -> str: ...
3 changes: 1 addition & 2 deletions machine/corpora/alignment_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
class AlignmentCorpus(Corpus[AlignmentRow]):
@property
@abstractmethod
def alignment_collections(self) -> Iterable[AlignmentCollection]:
...
def alignment_collections(self) -> Iterable[AlignmentCollection]: ...

def get_rows(self, text_ids: Optional[Iterable[str]] = None) -> ContextManagedGenerator[AlignmentRow, None, None]:
return ContextManagedGenerator(self._get_rows(text_ids))
Expand Down
9 changes: 6 additions & 3 deletions machine/corpora/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ def get_rows(self) -> ContextManagedGenerator[Row, None, None]:
return ContextManagedGenerator(self._get_rows())

@abstractmethod
def _get_rows(self) -> Generator[Row, None, None]:
...
def _get_rows(self) -> Generator[Row, None, None]: ...

def __iter__(self) -> ContextManagedGenerator[Row, None, None]:
return self.get_rows()
Expand All @@ -28,7 +27,11 @@ def count(self, include_empty: bool = True) -> int:
return sum(1 for row in rows if include_empty or not row.is_empty)

def interleaved_split(
self, percent: Optional[float] = None, size: Optional[int] = None, include_empty: bool = True, seed: Any = None
self,
percent: Optional[float] = None,
size: Optional[int] = None,
include_empty: bool = True,
seed: Any = None,
) -> Tuple[ContextManagedGenerator[Tuple[Row, bool], None, None], int, int]:
corpus_size = self.count(include_empty)
split_indices = get_split_indices(corpus_size, percent, size, seed)
Expand Down
6 changes: 2 additions & 4 deletions machine/corpora/dictionary_alignment_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,10 @@

class DictionaryAlignmentCorpus(AlignmentCorpus):
@overload
def __init__(self, *alignment_collections: AlignmentCollection) -> None:
...
def __init__(self, *alignment_collections: AlignmentCollection) -> None: ...

@overload
def __init__(self, alignment_collections: Iterable[AlignmentCollection]) -> None:
...
def __init__(self, alignment_collections: Iterable[AlignmentCollection]) -> None: ...

def __init__(self, *args, **kwargs) -> None:
alignment_collections: Iterable[AlignmentCollection]
Expand Down
6 changes: 2 additions & 4 deletions machine/corpora/dictionary_text_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,10 @@

class DictionaryTextCorpus(TextCorpus):
@overload
def __init__(self, *texts: Text) -> None:
...
def __init__(self, *texts: Text) -> None: ...

@overload
def __init__(self, texts: Iterable[Text]) -> None:
...
def __init__(self, texts: Iterable[Text]) -> None: ...

def __init__(self, *args, **kwargs) -> None:
texts: Iterable[Text]
Expand Down
6 changes: 2 additions & 4 deletions machine/corpora/file_stream_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,9 @@ def __init__(self, filename: StrPath) -> None:
def __enter__(self) -> FileStreamContainer:
return self

def __exit__(self, type: Any, value: Any, traceback: Any) -> None:
...
def __exit__(self, type: Any, value: Any, traceback: Any) -> None: ...

def open_stream(self) -> BinaryIO:
return open(self._filename, "rb")

def close(self) -> None:
...
def close(self) -> None: ...
11 changes: 4 additions & 7 deletions machine/corpora/flatten.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,15 @@


@overload
def flatten(corpora: Iterable[TextCorpus]) -> TextCorpus:
...
def flatten(corpora: Iterable[TextCorpus]) -> TextCorpus: ...


@overload
def flatten(corpora: Iterable[AlignmentCorpus]) -> AlignmentCorpus:
...
def flatten(corpora: Iterable[AlignmentCorpus]) -> AlignmentCorpus: ...


@overload
def flatten(corpora: Iterable[ParallelTextCorpus]) -> ParallelTextCorpus:
...
def flatten(corpora: Iterable[ParallelTextCorpus]) -> ParallelTextCorpus: ...


def flatten(corpora: Iterable[Corpus]) -> Corpus:
Expand All @@ -35,7 +32,7 @@ def flatten(corpora: Iterable[Corpus]) -> Corpus:
if len(corpus_list) == 1:
return corpus_list[0]

if any(type(corpus_list[0]) != type(corpus) for corpus in corpus_list[1:]):
if any(type(corpus_list[0]) != type(corpus) for corpus in corpus_list[1:]): # noqa: E721
raise TypeError("All corpora must be of the same type.")

if isinstance(corpus_list[0], TextCorpus):
Expand Down
26 changes: 19 additions & 7 deletions machine/corpora/parallel_text_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,11 @@ def from_hf_dataset(

@property
@abstractmethod
def is_source_tokenized(self) -> bool:
...
def is_source_tokenized(self) -> bool: ...

@property
@abstractmethod
def is_target_tokenized(self) -> bool:
...
def is_target_tokenized(self) -> bool: ...

def invert(self) -> ParallelTextCorpus:
def _invert(row: ParallelTextRow) -> ParallelTextRow:
Expand Down Expand Up @@ -304,7 +302,11 @@ def take(self, count: int) -> ParallelTextCorpus:
return _TakeParallelTextCorpus(self, count)

def split(
self, percent: Optional[float] = None, size: Optional[int] = None, include_empty: bool = True, seed: Any = None
self,
percent: Optional[float] = None,
size: Optional[int] = None,
include_empty: bool = True,
seed: Any = None,
) -> Tuple[ParallelTextCorpus, ParallelTextCorpus, int, int]:
corpus_size = self.count(include_empty)
split_indices = get_split_indices(corpus_size, percent, size, seed)
Expand Down Expand Up @@ -594,7 +596,12 @@ def _get_rows(self) -> Generator[ParallelTextRow, None, None]:
AlignedWordPair.from_string(v) if isinstance(v, str) else [AlignedWordPair(t[0], t[1]) for t in v]
)
yield ParallelTextRow(
text_id, refs, refs, [source] if len(source) > 0 else [], [target] if len(target) > 0 else [], alignment
text_id,
refs,
refs,
[source] if len(source) > 0 else [],
[target] if len(target) > 0 else [],
alignment,
)


Expand Down Expand Up @@ -670,7 +677,12 @@ def _get_rows(self) -> Generator[ParallelTextRow, None, None]:
alignment = [AlignedWordPair(si, ti) for (si, ti) in zip(src_indices, trg_indices)]

yield ParallelTextRow(
text_id, refs, refs, [source] if len(source) > 0 else [], [target] if len(target) > 0 else [], alignment
text_id,
refs,
refs,
[source] if len(source) > 0 else [],
[target] if len(target) > 0 else [],
alignment,
)
index += 1

Expand Down
4 changes: 3 additions & 1 deletion machine/corpora/scripture_text_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ def _get_rows(self) -> Generator[TextRow, None, None]:
yield from self._create_rows(vref)


def create_versification_ref_corpus(versification: Versification = ORIGINAL_VERSIFICATION) -> ScriptureTextCorpus:
def create_versification_ref_corpus(
versification: Versification = ORIGINAL_VERSIFICATION,
) -> ScriptureTextCorpus:
return ScriptureTextCorpus(
versification,
(
Expand Down
8 changes: 5 additions & 3 deletions machine/corpora/standard_parallel_text_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,9 +181,11 @@ def _get_rows(self) -> Generator[ParallelTextRow, None, None]:
range_info,
src_row,
trg_row,
alignment.aligned_word_pairs
if alignment is not None and src_row.ref == alignment.ref
else None,
(
alignment.aligned_word_pairs
if alignment is not None and src_row.ref == alignment.ref
else None
),
)

source_same_ref_rows.append(src_row)
Expand Down
12 changes: 4 additions & 8 deletions machine/corpora/stream_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,13 @@

class StreamContainer(ABC):
@abstractmethod
def __enter__(self) -> StreamContainer:
...
def __enter__(self) -> StreamContainer: ...

@abstractmethod
def __exit__(self, type: Any, value: Any, traceback: Any) -> None:
...
def __exit__(self, type: Any, value: Any, traceback: Any) -> None: ...

@abstractmethod
def open_stream(self) -> BinaryIO:
...
def open_stream(self) -> BinaryIO: ...

@abstractmethod
def close(self) -> None:
...
def close(self) -> None: ...
6 changes: 2 additions & 4 deletions machine/corpora/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@
class Text(Corpus[TextRow]):
@property
@abstractmethod
def id(self) -> str:
...
def id(self) -> str: ...

@property
@abstractmethod
def sort_key(self) -> str:
...
def sort_key(self) -> str: ...
12 changes: 7 additions & 5 deletions machine/corpora/text_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,11 @@
class TextCorpus(Corpus[TextRow]):
@property
@abstractmethod
def texts(self) -> Iterable[Text]:
...
def texts(self) -> Iterable[Text]: ...

@property
@abstractmethod
def is_tokenized(self) -> bool:
...
def is_tokenized(self) -> bool: ...

def get_rows(self, text_ids: Optional[Iterable[str]] = None) -> ContextManagedGenerator[TextRow, None, None]:
return ContextManagedGenerator(self._get_rows(text_ids))
Expand Down Expand Up @@ -132,7 +130,11 @@ def take(self, count: int) -> TextCorpus:
return _TakeTextCorpus(self, count)

def split(
self, percent: Optional[float] = None, size: Optional[int] = None, include_empty: bool = True, seed: Any = None
self,
percent: Optional[float] = None,
size: Optional[int] = None,
include_empty: bool = True,
seed: Any = None,
) -> Tuple[TextCorpus, TextCorpus, int, int]:
corpus_size = self.count(include_empty)
split_indices = get_split_indices(corpus_size, percent, size, seed)
Expand Down
6 changes: 2 additions & 4 deletions machine/corpora/text_file_alignment_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,10 @@

class TextFileAlignmentCorpus(DictionaryAlignmentCorpus):
@overload
def __init__(self, file_patterns: Iterable[StrPath]) -> None:
...
def __init__(self, file_patterns: Iterable[StrPath]) -> None: ...

@overload
def __init__(self, *file_patterns: StrPath) -> None:
...
def __init__(self, *file_patterns: StrPath) -> None: ...

def __init__(self, *args, **kwargs) -> None:
file_patterns: Iterable[str]
Expand Down
6 changes: 2 additions & 4 deletions machine/corpora/text_file_text_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,10 @@

class TextFileTextCorpus(DictionaryTextCorpus):
@overload
def __init__(self, file_patterns: Iterable[StrPath]) -> None:
...
def __init__(self, file_patterns: Iterable[StrPath]) -> None: ...

@overload
def __init__(self, *file_patterns: StrPath) -> None:
...
def __init__(self, *file_patterns: StrPath) -> None: ...

def __init__(self, *args, **kwargs) -> None:
file_patterns: Iterable[str]
Expand Down
6 changes: 5 additions & 1 deletion machine/corpora/text_row.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ class TextRowFlags(Flag):

class TextRow(Sequence[str]):
def __init__(
self, text_id: str, ref: Any, segment: Sequence[str] = [], flags: TextRowFlags = TextRowFlags.SENTENCE_START
self,
text_id: str,
ref: Any,
segment: Sequence[str] = [],
flags: TextRowFlags = TextRowFlags.SENTENCE_START,
) -> None:
self._text_id = text_id
self._ref = ref
Expand Down
Loading
Loading