Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

rename 'node_rank' to 'global_rank' in dataset reader 'DistributedInfo' #4608

Merged
merged 3 commits into from
Aug 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions allennlp/data/dataset_readers/dataset_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,20 +38,21 @@ class WorkerInfo:
@dataclass
class DistributedInfo:
"""
Contains information about the node rank and world size when the reader is being
Contains information about the global process rank and total world size when the reader is being
used within distributed training.

From a `DatasetReader` this can be accessed with the [`get_distributed_info()`](#get_distributed_info) method.
"""

world_size: int
"""
The total number of distributed nodes.
The total number of processes in the distributed group.
"""

node_rank: int
global_rank: int
"""
The 0-indexed ID of the current node.
The 0-indexed ID of the current process within the distributed group.
epwalsh marked this conversation as resolved.
Show resolved Hide resolved
This will be between 0 and `world_size - 1`, inclusive.
"""


Expand Down Expand Up @@ -313,7 +314,7 @@ def _multi_worker_islice(
if max_instances is not None:
# Need to scale down max_instances because otherwise each node would read self.max_instances,
# but we really want self.max_instances total across all nodes.
if self._distributed_info.node_rank < (
if self._distributed_info.global_rank < (
max_instances % self._distributed_info.world_size
):
max_instances = max_instances // self._distributed_info.world_size + 1
Expand All @@ -323,7 +324,7 @@ def _multi_worker_islice(
if not self.manual_distributed_sharding:
sharded_slice = itertools.islice(
sharded_slice,
self._distributed_info.node_rank,
self._distributed_info.global_rank,
None,
self._distributed_info.world_size,
)
Expand Down
8 changes: 4 additions & 4 deletions tests/data/dataset_readers/dataset_reader_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,9 @@ def test_instance_slicing(
minimum_expected_result_size //= world_size
minimum_expected_result_size //= num_workers
maximum_expected_result_size = minimum_expected_result_size + 1
for node_rank in range(world_size):
for global_rank in range(world_size):
monkeypatch.setattr(common_util, "is_distributed", lambda: True)
monkeypatch.setattr(dist, "get_rank", lambda: node_rank)
monkeypatch.setattr(dist, "get_rank", lambda: global_rank)
monkeypatch.setattr(dist, "get_world_size", lambda: world_size)
for worker_id in range(num_workers):
reader = reader_class(max_instances=max_instances)
Expand All @@ -137,9 +137,9 @@ def test_instance_slicing(
elif world_size is not None:
minimum_expected_result_size //= world_size
maximum_expected_result_size = minimum_expected_result_size + 1
for node_rank in range(world_size):
for global_rank in range(world_size):
monkeypatch.setattr(common_util, "is_distributed", lambda: True)
monkeypatch.setattr(dist, "get_rank", lambda: node_rank)
monkeypatch.setattr(dist, "get_rank", lambda: global_rank)
monkeypatch.setattr(dist, "get_world_size", lambda: world_size)
reader = reader_class(max_instances=max_instances)
result = set(
Expand Down