Skip to content

Commit

Permalink
And upgrade torch, numpy, pandas and accelerate as well as black, pyr… (
Browse files Browse the repository at this point in the history
#100)

* And upgrade torch, numpy, pandas and accelerate as well as black, pyright and flake8.  To fix the OutOfMemory Error, but also to keep current.

* torch 2.1.2
  • Loading branch information
johnml1135 authored Feb 3, 2024
1 parent f915db0 commit 4267780
Show file tree
Hide file tree
Showing 90 changed files with 750 additions and 685 deletions.
2 changes: 1 addition & 1 deletion .flake8
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[flake8]
max-line-length = 120
max-line-length = 125
extend-ignore = E203
per-file-ignores = **/*.pyi:E252,E301,E302,E305,E501,E701,E704,E741,F401,F811,F821
exclude =
Expand Down
12 changes: 4 additions & 8 deletions machine/annotations/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,25 +69,21 @@ def __repr__(self) -> str:
class _RangeFactory(ABC, Generic[Offset]):
@property
@abstractmethod
def include_endpoint(self) -> bool:
...
def include_endpoint(self) -> bool: ...

def create(self, start: Offset, end: Optional[Offset]) -> Range[Offset]:
if end is None:
end = start
return Range(self, start, end)

@abstractmethod
def get_length(self, start: Offset, end: Offset) -> int:
...
def get_length(self, start: Offset, end: Offset) -> int: ...

@abstractmethod
def iterate(self, start: Offset, end: Offset) -> Iterable[Offset]:
...
def iterate(self, start: Offset, end: Offset) -> Iterable[Offset]: ...

@abstractmethod
def offset_compare(self, x: Offset, y: Offset) -> int:
...
def offset_compare(self, x: Offset, y: Offset) -> int: ...


class _IntRangeFactory(_RangeFactory[int]):
Expand Down
6 changes: 2 additions & 4 deletions machine/clusterers/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,10 @@

class Cluster(Generic[T]):
@overload
def __init__(self, *data_objects: T, noise: bool = False, description: Optional[str] = None) -> None:
...
def __init__(self, *data_objects: T, noise: bool = False, description: Optional[str] = None) -> None: ...

@overload
def __init__(self, data_objects: Iterable[T], noise: bool = False, description: Optional[str] = None) -> None:
...
def __init__(self, data_objects: Iterable[T], noise: bool = False, description: Optional[str] = None) -> None: ...

def __init__(self, *args, **kwargs) -> None:
self._data_objects: FrozenSet[T]
Expand Down
3 changes: 1 addition & 2 deletions machine/clusterers/flat_clusterer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,4 @@

class FlatClusterer(ABC, Generic[T]):
@abstractmethod
def generate_clusters(self, data_objects: Iterable[T]) -> Iterable[Cluster[T]]:
...
def generate_clusters(self, data_objects: Iterable[T]) -> Iterable[Cluster[T]]: ...
3 changes: 1 addition & 2 deletions machine/clusterers/rooted_hierarchical_clusterer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,4 @@

class RootedHierarchicalClusterer(ABC, Generic[T]):
@abstractmethod
def generate_clusters(self, data_objects: Iterable[T]) -> DiGraph[Cluster[T]]:
...
def generate_clusters(self, data_objects: Iterable[T]) -> DiGraph[Cluster[T]]: ...
3 changes: 1 addition & 2 deletions machine/clusterers/unrooted_hierarchical_clusterer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,4 @@

class UnrootedHierarchicalClusterer(ABC, Generic[T]):
@abstractmethod
def generate_clusters(self, data_objects: Iterable[T]) -> Graph[Cluster[T]]:
...
def generate_clusters(self, data_objects: Iterable[T]) -> Graph[Cluster[T]]: ...
3 changes: 2 additions & 1 deletion machine/clusterers/upgma_clusterer.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,5 +97,6 @@ def get_all_data_objects_count(tree: DiGraph[Cluster[T]], cluster: Cluster[T]) -
if tree.out_degree(cluster) == 0:
return len(cluster.data_objects)
return sum(
(get_all_data_objects_count(tree, edge[1]) for edge in tree.out_edges(cluster)), len(cluster.data_objects)
(get_all_data_objects_count(tree, edge[1]) for edge in tree.out_edges(cluster)),
len(cluster.data_objects),
)
6 changes: 2 additions & 4 deletions machine/corpora/alignment_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@
class AlignmentCollection(Corpus[AlignmentRow]):
@property
@abstractmethod
def id(self) -> str:
...
def id(self) -> str: ...

@property
@abstractmethod
def sort_key(self) -> str:
...
def sort_key(self) -> str: ...
3 changes: 1 addition & 2 deletions machine/corpora/alignment_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
class AlignmentCorpus(Corpus[AlignmentRow]):
@property
@abstractmethod
def alignment_collections(self) -> Iterable[AlignmentCollection]:
...
def alignment_collections(self) -> Iterable[AlignmentCollection]: ...

def get_rows(self, text_ids: Optional[Iterable[str]] = None) -> ContextManagedGenerator[AlignmentRow, None, None]:
return ContextManagedGenerator(self._get_rows(text_ids))
Expand Down
9 changes: 6 additions & 3 deletions machine/corpora/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ def get_rows(self) -> ContextManagedGenerator[Row, None, None]:
return ContextManagedGenerator(self._get_rows())

@abstractmethod
def _get_rows(self) -> Generator[Row, None, None]:
...
def _get_rows(self) -> Generator[Row, None, None]: ...

def __iter__(self) -> ContextManagedGenerator[Row, None, None]:
return self.get_rows()
Expand All @@ -28,7 +27,11 @@ def count(self, include_empty: bool = True) -> int:
return sum(1 for row in rows if include_empty or not row.is_empty)

def interleaved_split(
self, percent: Optional[float] = None, size: Optional[int] = None, include_empty: bool = True, seed: Any = None
self,
percent: Optional[float] = None,
size: Optional[int] = None,
include_empty: bool = True,
seed: Any = None,
) -> Tuple[ContextManagedGenerator[Tuple[Row, bool], None, None], int, int]:
corpus_size = self.count(include_empty)
split_indices = get_split_indices(corpus_size, percent, size, seed)
Expand Down
6 changes: 2 additions & 4 deletions machine/corpora/dictionary_alignment_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,10 @@

class DictionaryAlignmentCorpus(AlignmentCorpus):
@overload
def __init__(self, *alignment_collections: AlignmentCollection) -> None:
...
def __init__(self, *alignment_collections: AlignmentCollection) -> None: ...

@overload
def __init__(self, alignment_collections: Iterable[AlignmentCollection]) -> None:
...
def __init__(self, alignment_collections: Iterable[AlignmentCollection]) -> None: ...

def __init__(self, *args, **kwargs) -> None:
alignment_collections: Iterable[AlignmentCollection]
Expand Down
6 changes: 2 additions & 4 deletions machine/corpora/dictionary_text_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,10 @@

class DictionaryTextCorpus(TextCorpus):
@overload
def __init__(self, *texts: Text) -> None:
...
def __init__(self, *texts: Text) -> None: ...

@overload
def __init__(self, texts: Iterable[Text]) -> None:
...
def __init__(self, texts: Iterable[Text]) -> None: ...

def __init__(self, *args, **kwargs) -> None:
texts: Iterable[Text]
Expand Down
6 changes: 2 additions & 4 deletions machine/corpora/file_stream_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,9 @@ def __init__(self, filename: StrPath) -> None:
def __enter__(self) -> FileStreamContainer:
return self

def __exit__(self, type: Any, value: Any, traceback: Any) -> None:
...
def __exit__(self, type: Any, value: Any, traceback: Any) -> None: ...

def open_stream(self) -> BinaryIO:
return open(self._filename, "rb")

def close(self) -> None:
...
def close(self) -> None: ...
11 changes: 4 additions & 7 deletions machine/corpora/flatten.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,15 @@


@overload
def flatten(corpora: Iterable[TextCorpus]) -> TextCorpus:
...
def flatten(corpora: Iterable[TextCorpus]) -> TextCorpus: ...


@overload
def flatten(corpora: Iterable[AlignmentCorpus]) -> AlignmentCorpus:
...
def flatten(corpora: Iterable[AlignmentCorpus]) -> AlignmentCorpus: ...


@overload
def flatten(corpora: Iterable[ParallelTextCorpus]) -> ParallelTextCorpus:
...
def flatten(corpora: Iterable[ParallelTextCorpus]) -> ParallelTextCorpus: ...


def flatten(corpora: Iterable[Corpus]) -> Corpus:
Expand All @@ -35,7 +32,7 @@ def flatten(corpora: Iterable[Corpus]) -> Corpus:
if len(corpus_list) == 1:
return corpus_list[0]

if any(type(corpus_list[0]) != type(corpus) for corpus in corpus_list[1:]):
if any(type(corpus_list[0]) != type(corpus) for corpus in corpus_list[1:]): # noqa: E721
raise TypeError("All corpora must be of the same type.")

if isinstance(corpus_list[0], TextCorpus):
Expand Down
26 changes: 19 additions & 7 deletions machine/corpora/parallel_text_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,11 @@ def from_hf_dataset(

@property
@abstractmethod
def is_source_tokenized(self) -> bool:
...
def is_source_tokenized(self) -> bool: ...

@property
@abstractmethod
def is_target_tokenized(self) -> bool:
...
def is_target_tokenized(self) -> bool: ...

def invert(self) -> ParallelTextCorpus:
def _invert(row: ParallelTextRow) -> ParallelTextRow:
Expand Down Expand Up @@ -304,7 +302,11 @@ def take(self, count: int) -> ParallelTextCorpus:
return _TakeParallelTextCorpus(self, count)

def split(
self, percent: Optional[float] = None, size: Optional[int] = None, include_empty: bool = True, seed: Any = None
self,
percent: Optional[float] = None,
size: Optional[int] = None,
include_empty: bool = True,
seed: Any = None,
) -> Tuple[ParallelTextCorpus, ParallelTextCorpus, int, int]:
corpus_size = self.count(include_empty)
split_indices = get_split_indices(corpus_size, percent, size, seed)
Expand Down Expand Up @@ -594,7 +596,12 @@ def _get_rows(self) -> Generator[ParallelTextRow, None, None]:
AlignedWordPair.from_string(v) if isinstance(v, str) else [AlignedWordPair(t[0], t[1]) for t in v]
)
yield ParallelTextRow(
text_id, refs, refs, [source] if len(source) > 0 else [], [target] if len(target) > 0 else [], alignment
text_id,
refs,
refs,
[source] if len(source) > 0 else [],
[target] if len(target) > 0 else [],
alignment,
)


Expand Down Expand Up @@ -670,7 +677,12 @@ def _get_rows(self) -> Generator[ParallelTextRow, None, None]:
alignment = [AlignedWordPair(si, ti) for (si, ti) in zip(src_indices, trg_indices)]

yield ParallelTextRow(
text_id, refs, refs, [source] if len(source) > 0 else [], [target] if len(target) > 0 else [], alignment
text_id,
refs,
refs,
[source] if len(source) > 0 else [],
[target] if len(target) > 0 else [],
alignment,
)
index += 1

Expand Down
4 changes: 3 additions & 1 deletion machine/corpora/scripture_text_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ def _get_rows(self) -> Generator[TextRow, None, None]:
yield from self._create_rows(vref)


def create_versification_ref_corpus(versification: Versification = ORIGINAL_VERSIFICATION) -> ScriptureTextCorpus:
def create_versification_ref_corpus(
versification: Versification = ORIGINAL_VERSIFICATION,
) -> ScriptureTextCorpus:
return ScriptureTextCorpus(
versification,
(
Expand Down
8 changes: 5 additions & 3 deletions machine/corpora/standard_parallel_text_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,9 +181,11 @@ def _get_rows(self) -> Generator[ParallelTextRow, None, None]:
range_info,
src_row,
trg_row,
alignment.aligned_word_pairs
if alignment is not None and src_row.ref == alignment.ref
else None,
(
alignment.aligned_word_pairs
if alignment is not None and src_row.ref == alignment.ref
else None
),
)

source_same_ref_rows.append(src_row)
Expand Down
12 changes: 4 additions & 8 deletions machine/corpora/stream_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,13 @@

class StreamContainer(ABC):
@abstractmethod
def __enter__(self) -> StreamContainer:
...
def __enter__(self) -> StreamContainer: ...

@abstractmethod
def __exit__(self, type: Any, value: Any, traceback: Any) -> None:
...
def __exit__(self, type: Any, value: Any, traceback: Any) -> None: ...

@abstractmethod
def open_stream(self) -> BinaryIO:
...
def open_stream(self) -> BinaryIO: ...

@abstractmethod
def close(self) -> None:
...
def close(self) -> None: ...
6 changes: 2 additions & 4 deletions machine/corpora/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@
class Text(Corpus[TextRow]):
@property
@abstractmethod
def id(self) -> str:
...
def id(self) -> str: ...

@property
@abstractmethod
def sort_key(self) -> str:
...
def sort_key(self) -> str: ...
12 changes: 7 additions & 5 deletions machine/corpora/text_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,11 @@
class TextCorpus(Corpus[TextRow]):
@property
@abstractmethod
def texts(self) -> Iterable[Text]:
...
def texts(self) -> Iterable[Text]: ...

@property
@abstractmethod
def is_tokenized(self) -> bool:
...
def is_tokenized(self) -> bool: ...

def get_rows(self, text_ids: Optional[Iterable[str]] = None) -> ContextManagedGenerator[TextRow, None, None]:
return ContextManagedGenerator(self._get_rows(text_ids))
Expand Down Expand Up @@ -132,7 +130,11 @@ def take(self, count: int) -> TextCorpus:
return _TakeTextCorpus(self, count)

def split(
self, percent: Optional[float] = None, size: Optional[int] = None, include_empty: bool = True, seed: Any = None
self,
percent: Optional[float] = None,
size: Optional[int] = None,
include_empty: bool = True,
seed: Any = None,
) -> Tuple[TextCorpus, TextCorpus, int, int]:
corpus_size = self.count(include_empty)
split_indices = get_split_indices(corpus_size, percent, size, seed)
Expand Down
6 changes: 2 additions & 4 deletions machine/corpora/text_file_alignment_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,10 @@

class TextFileAlignmentCorpus(DictionaryAlignmentCorpus):
@overload
def __init__(self, file_patterns: Iterable[StrPath]) -> None:
...
def __init__(self, file_patterns: Iterable[StrPath]) -> None: ...

@overload
def __init__(self, *file_patterns: StrPath) -> None:
...
def __init__(self, *file_patterns: StrPath) -> None: ...

def __init__(self, *args, **kwargs) -> None:
file_patterns: Iterable[str]
Expand Down
6 changes: 2 additions & 4 deletions machine/corpora/text_file_text_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,10 @@

class TextFileTextCorpus(DictionaryTextCorpus):
@overload
def __init__(self, file_patterns: Iterable[StrPath]) -> None:
...
def __init__(self, file_patterns: Iterable[StrPath]) -> None: ...

@overload
def __init__(self, *file_patterns: StrPath) -> None:
...
def __init__(self, *file_patterns: StrPath) -> None: ...

def __init__(self, *args, **kwargs) -> None:
file_patterns: Iterable[str]
Expand Down
6 changes: 5 additions & 1 deletion machine/corpora/text_row.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ class TextRowFlags(Flag):

class TextRow(Sequence[str]):
def __init__(
self, text_id: str, ref: Any, segment: Sequence[str] = [], flags: TextRowFlags = TextRowFlags.SENTENCE_START
self,
text_id: str,
ref: Any,
segment: Sequence[str] = [],
flags: TextRowFlags = TextRowFlags.SENTENCE_START,
) -> None:
self._text_id = text_id
self._ref = ref
Expand Down
Loading

0 comments on commit 4267780

Please sign in to comment.