Fix some typos (colective, fuctools, etc.) (PaddlePaddle#61745)

Difers · Feb 19, 2024 · e134807 · e134807
1 parent 362be32
commit e134807
Show file tree

Hide file tree

Showing 20 changed files with 74 additions and 76 deletions.
diff --git a/python/paddle/incubate/distributed/fleet/parameter_server/pslib/__init__.py b/python/paddle/incubate/distributed/fleet/parameter_server/pslib/__init__.py
@@ -1312,7 +1312,7 @@ def backward(
 
     def _remove_collective_ops(self, program, name):
         """
-        colective init op should call once, so remove other call.
+        collective init op should call once, so remove other call.
         """
         block = program.global_block()
         for ids, op in list(enumerate(block.ops)):

diff --git a/python/paddle/incubate/distributed/fleet/parameter_server/pslib/node.py b/python/paddle/incubate/distributed/fleet/parameter_server/pslib/node.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 """Definition of Server and Worker."""
 
-# NOTE: reduce removed in fuctools in python3
+# NOTE: reduce removed in functools in python3
 from functools import reduce
 
 from . import ps_pb2 as pslib
@@ -133,10 +133,10 @@ def add_sparse_table(self, table_id, strategy):
             if key not in support_sparse_key_list:
                 raise ValueError("strategy key '%s' not support" % (key))
 
-        support_table_calss = ['DownpourSparseTable', 'DownpourSparseSSDTable']
+        support_table_class = ['DownpourSparseTable', 'DownpourSparseSSDTable']
         if strategy.get('sparse_table_class') is not None:
             table_class = strategy.get('sparse_table_class')
-            if table_class not in support_table_calss:
+            if table_class not in support_table_class:
                 raise ValueError(
                     "support sparse_table_class: [ 'DownpourSparseTable', 'DownpourSparseSSDTable'], \
                         but actual %s"

diff --git a/python/paddle/incubate/distributed/fleet/parameter_server/pslib/optimizer_factory.py b/python/paddle/incubate/distributed/fleet/parameter_server/pslib/optimizer_factory.py
@@ -232,7 +232,7 @@ def _if_last_block(self, op, _equal_dict):
                 return False
         return True
 
-    def _generte_cond_para_map(
+    def _generate_cond_para_map(
         self, op, _fill_value_dict, _equal_fill_dict, _now_program, _all_params
     ):
         # generate cond value to parameter map recursively
@@ -257,7 +257,7 @@ def _generte_cond_para_map(
             ops_cond = _now_program.block(int(op.attr('sub_block').id)).ops
             for op in ops_cond:
                 if op.type == 'conditional_block':
-                    self._generte_cond_para_map(
+                    self._generate_cond_para_map(
                         op,
                         _fill_value_dict,
                         _equal_fill_dict,
@@ -540,7 +540,7 @@ def _minimize(
                     if op.type == 'equal':
                         equal_fill_dict[op.output('Out')[0]] = op.input('Y')[0]
                     if op.type == 'conditional_block':
-                        self._generte_cond_para_map(
+                        self._generate_cond_para_map(
                             op,
                             fill_value_dict,
                             equal_fill_dict,

diff --git a/python/paddle/incubate/distributed/fleet/role_maker.py b/python/paddle/incubate/distributed/fleet/role_maker.py
@@ -30,7 +30,7 @@ class Role:
 
 class MockBarrier:
     """
-    MockBarrier is a empty impletation for barrier
+    MockBarrier is a empty implementation for barrier
     mock as a real barrier for never-barrier in a specific scenario
     """
 
@@ -869,7 +869,7 @@ def server_index(self):
 
     def worker_num(self):
         """
-        retrun the current number of worker
+        return the current number of worker
         """
         if not self._role_is_generated:
             self.generate_role()

diff --git a/python/paddle/incubate/distributed/utils/io/dist_save.py b/python/paddle/incubate/distributed/utils/io/dist_save.py
@@ -57,10 +57,10 @@ def save(state_dict, path, **configs):
                 If True, save the file in the c++ binary format when saving a single static graph variable; otherwise, save it in pickle format.
                 Default: False.
             2. gather_to(int|list|tuple|None):
-                To specify which global rank to save in.Defalut is None.
+                To specify which global rank to save in.Default is None.
                 None value means distributed saving with no gathering to a single card.
             3. state_type(str):
-                Value can be 'params' or 'opt', specifying to save parametres or optimizer state.
+                Value can be 'params' or 'opt', specifying to save parameters or optimizer state.
             4. max_grouped_size(str|int):
                 To limit the max size(how many bits) a object group to be transfered a time.
                 If str, the format must be as num+'G/M/K', for example, 3G, 2K, 10M, etc. Default is 3G.
@@ -86,7 +86,7 @@ def save(state_dict, path, **configs):
             >>> # gather params to rank 0 and then save
             >>> paddle.incubate.distributed.utils.io.save(model.state_dict(), path="path/to/save.pdparams", gather_to=[0], state_type="params")
 
-            >>> # save whoe params on all ranks
+            >>> # save whole params on all ranks
             >>> paddle.incubate.distributed.utils.io.save(model.state_dict(), path="path/to/save.pdparams", gather_to=[0,1], state_type="params")
 
             >>> # save optimizer state dict on rank 0
@@ -343,7 +343,7 @@ def _grouped_gather_data_dict(state_data_dict, dst, group, max_size):
             f"s list size: {sum(len(s) for s in s_list)} output: {len(output_state)}"
         )
 
-    # Because each size of groups may be different, here we should wait all objetcs gatherd.
+    # Because each size of groups may be different, here we should wait all objects gatherd.
     # The while block breaks until all objects from every rank are empty, which means all of the objects transforming is done.
     while True:
         s_list = []
@@ -375,7 +375,7 @@ def _grouped_gather_data_dict(state_data_dict, dst, group, max_size):
 
 def _same_keys(state_dict, group):
     """
-    Check whther all keys in each dict in the group are the same.
+    Check whether all keys in each dict in the group are the same.
     Used in sharding strategy to determine whether a dict needs to be gathered.
     """
     keys = list(state_dict.keys())

diff --git a/python/paddle/incubate/distributed/utils/io/save_for_auto.py b/python/paddle/incubate/distributed/utils/io/save_for_auto.py
@@ -201,16 +201,16 @@ def _unset_dims_mapping(param):
 def _get_dims_mapping(dist_parameter, mp_group):
     """
     Description:
-        return the sliting mapping:
+        return the splitting mapping:
             {tensor_name: spiting_strategy}
     Args:
         dist_parameters(list): distributed model parameters
         mp_group(ProcessGroup): Model Parallel communication group
     Return:
-        The sliting mapping
+        The splitting mapping
     Examples:
-        spliting_strategy's format (-1, -1, -1, 0), meaing the dims
-        of  the tennsor is 4 and it is splited along the first strategy axis in mesh
+        splitting_strategy's format (-1, -1, -1, 0), meaning the dims
+        of the tensor is 4 and it is splited along the first strategy axis in mesh
 
     Mesh Examples: (2, 4) means dp=2, mp=4
 
@@ -220,9 +220,9 @@ def _get_dims_mapping(dist_parameter, mp_group):
 
     dist_shape = np.array(dist_parameter.shape)
     if hasattr(dist_parameter, "split_axis"):
-        aixs = dist_parameter.split_axis
+        axis = dist_parameter.split_axis
         mapping = [-1 for _ in dist_shape]
-        mapping[aixs] = 1
+        mapping[axis] = 1
         logger.debug(
             f"{dist_parameter.name} has attr split_axis: mapping: {mapping}"
         )
@@ -280,7 +280,7 @@ def _name_mapping_dist2single(state_dict, pp_group):
             logger.debug(f"matched: {k}: {matched}")
             assert (
                 matched is not None
-            ), f"the name of param, '{k}', is not satisfyied the format 'name_idx.xxx'"
+            ), f"the name of param, '{k}', is not satisfied the format 'name_idx.xxx'"
             name_idx = k[matched.start() : matched.end()]
             logger.debug(f"get param_type_idx: {name_idx}")
 
@@ -294,7 +294,7 @@ def _name_mapping_dist2single(state_dict, pp_group):
                 param_types[name] = [0] * pp_group.nranks
             param_types[name][pp] += 1
 
-        # check if continous
+        # check if continuous
         types_idx = {}
         for _, v in param_type_idx.items():
             if v[0] not in types_idx:
@@ -304,7 +304,7 @@ def _name_mapping_dist2single(state_dict, pp_group):
         for k, v in types_idx.items():
             assert v == list(
                 range(v[0], v[-1] + 1)
-            ), f"{k} is not continous: {v}"
+            ), f"{k} is not continuous: {v}"
 
     logger.debug(f"param type: {param_types}")
 

diff --git a/python/paddle/incubate/multiprocessing/reductions.py b/python/paddle/incubate/multiprocessing/reductions.py
@@ -133,9 +133,9 @@ def _rebuild_cuda_tensor(
         )
         # We only cache cuda shared tensor here.
         # The opening cost of cudaIpcMemoryHandle is very high.
-        # Since we cache the recived tensor directly,
+        # Since we cache the received tensor directly,
         # The sender may reallocate the tensor space,
-        # you should manualy maintian the lifecycle of ipc tensor
+        # you should manually maintain the lifecycle of ipc tensor
         shared_cache[(handle, offset_bytes)] = lodtensor
     else:
         lodtensor = paddle.base.core.LoDTensor()
@@ -159,17 +159,17 @@ def _reduce_lodtensor(lodtensor):
     ):
         for dim in lodtensor.shape():
             if dim == 0:
-                # Empty tensors have nothing be mmapped.
+                # Empty tensors have nothing be mapped.
                 return (_rebuild_lodtensor_empty, (type(lodtensor),))
 
-        # Default use share filename stratege
+        # Default use share filename strategy
         metadata = (
             lodtensor._share_filename()
         )  # ipc_name, size, type_idx, dims, lod
         rebuild = _rebuild_lodtensor_filename
         lodtensor._shared_incref()
         # TODO, maintain reference for lodtensor
-        # TODO: support file_discriptor stratege
+        # TODO: support file_descriptor strategy
     elif lodtensor._place().is_gpu_place():
         metadata = lodtensor._share_cuda()
         rebuild = _rebuild_cuda_tensor

diff --git a/python/paddle/incubate/nn/functional/fused_transformer.py b/python/paddle/incubate/nn/functional/fused_transformer.py
@@ -537,7 +537,7 @@ def fused_multi_head_attention(
     name=None,
 ):
     r"""
-    Attention mapps queries and a set of key-value pairs to outputs, and
+    Attention maps queries and a set of key-value pairs to outputs, and
     Multi-Head Attention performs multiple parallel attention to jointly attending
     to information from different representation subspaces. This API only
     support self_attention. The pseudo code is as follows:

diff --git a/python/paddle/incubate/nn/layer/fused_transformer.py b/python/paddle/incubate/nn/layer/fused_transformer.py
@@ -195,7 +195,7 @@ def extra_repr(self):
 
 class FusedMultiHeadAttention(Layer):
     """
-    Attention mapps queries and a set of key-value pairs to outputs, and
+    Attention maps queries and a set of key-value pairs to outputs, and
     Multi-Head Attention performs multiple parallel attention to jointly attending
     to information from different representation subspaces.
     Please refer to `Attention Is All You Need <https://arxiv.org/pdf/1706.03762.pdf>`_

diff --git a/python/paddle/incubate/optimizer/functional/bfgs.py b/python/paddle/incubate/optimizer/functional/bfgs.py
@@ -61,7 +61,7 @@ def minimize_bfgs(
         tolerance_grad (float, optional): terminates if the gradient norm is smaller than this. Currently gradient norm uses inf norm. Default value: 1e-7.
         tolerance_change (float, optional): terminates if the change of function value/position/parameter between two iterations is smaller than this value. Default value: 1e-9.
         initial_inverse_hessian_estimate (Tensor, optional): the initial inverse hessian approximation at initial_position. It must be symmetric and positive definite. If not given, will use an identity matrix of order N, which is size of ``initial_position`` . Default value: None.
-        line_search_fn (str, optional): indicate which line search method to use, only support 'strong wolfe' right now. May support 'Hager Zhang' in the futrue. Default value: 'strong wolfe'.
+        line_search_fn (str, optional): indicate which line search method to use, only support 'strong wolfe' right now. May support 'Hager Zhang' in the future. Default value: 'strong wolfe'.
         max_line_search_iters (int, optional): the maximum number of line search iterations. Default value: 50.
         initial_step_length (float, optional): step length used in first iteration of line search. different initial_step_length may cause different optimal result. For methods like Newton and quasi-Newton the initial trial step length should always be 1.0. Default value: 1.0.
         dtype ('float32' | 'float64', optional): data type used in the algorithm, the data type of the input parameter must be consistent with the dtype. Default value: 'float32'.
@@ -161,7 +161,7 @@ def body(k, done, is_converge, num_func_calls, xk, value, g1, Hk):
         # --------------   compute pk   -------------- #
         pk = -paddle.matmul(Hk, g1)
 
-        # --------------   compute alpha by line serach   -------------- #
+        # --------------   compute alpha by line search   -------------- #
         if line_search_fn == 'strong_wolfe':
             alpha, value, g2, ls_func_calls = strong_wolfe(
                 f=objective_func,

diff --git a/python/paddle/incubate/optimizer/functional/lbfgs.py b/python/paddle/incubate/optimizer/functional/lbfgs.py
@@ -63,7 +63,7 @@ def minimize_lbfgs(
         tolerance_grad (float, optional): terminates if the gradient norm is smaller than this. Currently gradient norm uses inf norm. Default value: 1e-7.
         tolerance_change (float, optional): terminates if the change of function value/position/parameter between two iterations is smaller than this value. Default value: 1e-9.
         initial_inverse_hessian_estimate (Tensor, optional): the initial inverse hessian approximation at initial_position. It must be symmetric and positive definite. If not given, will use an identity matrix of order N, which is size of ``initial_position`` . Default value: None.
-        line_search_fn (str, optional): indicate which line search method to use, only support 'strong wolfe' right now. May support 'Hager Zhang' in the futrue. Default value: 'strong wolfe'.
+        line_search_fn (str, optional): indicate which line search method to use, only support 'strong wolfe' right now. May support 'Hager Zhang' in the future. Default value: 'strong wolfe'.
         max_line_search_iters (int, optional): the maximum number of line search iterations. Default value: 50.
         initial_step_length (float, optional): step length used in first iteration of line search. different initial_step_length may cause different optimal result. For methods like Newton and quasi-Newton the initial trial step length should always be 1.0. Default value: 1.0.
         dtype ('float32' | 'float64', optional): data type used in the algorithm, the data type of the input parameter must be consistent with the dtype. Default value: 'float32'.
@@ -240,7 +240,7 @@ def body(i, r):
 
         pk = -r
 
-        # --------------   compute alpha by line serach    -------------- #
+        # --------------   compute alpha by line search    -------------- #
         if line_search_fn == 'strong_wolfe':
             alpha, value, g2, ls_func_calls = strong_wolfe(
                 f=objective_func,

diff --git a/python/paddle/incubate/optimizer/functional/line_search.py b/python/paddle/incubate/optimizer/functional/line_search.py
@@ -212,7 +212,7 @@ def body_zoom(
             phi_j, derf_j, derphi_j = phi_and_derphi(aj)
 
             def true_fn():
-                # use assing to modify the variable in-place
+                # use assign to modify the variable in-place
                 paddle.assign(aj, a_hi)
                 paddle.assign(phi_j, phi_hi)
                 paddle.assign(derphi_j, derphi_hi)