Fix typos across the code, doc and comments (#9443)

pydata · Sep 8, 2024 · f97f999 · f97f999
1 parent 254f6c5
commit f97f999
Show file tree

Hide file tree

Showing 24 changed files with 37 additions and 36 deletions.
diff --git a/design_notes/flexible_indexes_notes.md b/design_notes/flexible_indexes_notes.md
@@ -71,7 +71,7 @@ An `XarrayIndex` subclass must/should/may implement the following properties/met
 - a `data` property to access index's data and map it to coordinate data (see [Section 4](#4-indexvariable))
 - a `__getitem__()` implementation to propagate the index through DataArray/Dataset indexing operations
 - `equals()`, `union()` and `intersection()` methods for data alignment (see [Section 2.6](#26-using-indexes-for-data-alignment))
-- Xarray coordinate getters (see [Section 2.2.4](#224-implicit-coodinates))
+- Xarray coordinate getters (see [Section 2.2.4](#224-implicit-coordinates))
 - a method that may return a new index and that will be called when one of the corresponding coordinates is dropped from the Dataset/DataArray (multi-coordinate indexes)
 - `encode()`/`decode()` methods that would allow storage-agnostic serialization and fast-path reconstruction of the underlying index object(s) (see [Section 2.8](#28-index-encoding))
 - one or more "non-standard" methods or properties that could be leveraged in Xarray 3rd-party extensions like Dataset/DataArray accessors (see [Section 2.7](#27-using-indexes-for-other-purposes))

diff --git a/design_notes/grouper_objects.md b/design_notes/grouper_objects.md
@@ -166,7 +166,7 @@ where `|` represents chunk boundaries. A simple rechunking to
 ```
 000|111122|3333
 ```
-would make this resampling reduction an embarassingly parallel blockwise problem.
+would make this resampling reduction an embarrassingly parallel blockwise problem.
 
 Similarly consider monthly-mean climatologies for which the month numbers might be
 ```

diff --git a/design_notes/named_array_design_doc.md b/design_notes/named_array_design_doc.md
@@ -258,7 +258,7 @@ Questions:
    Variable.coarsen_reshape
    Variable.rolling_window
 
-   Variable.set_dims # split this into broadcas_to and expand_dims
+   Variable.set_dims # split this into broadcast_to and expand_dims
 
 
 # Reordering/Reshaping

diff --git a/doc/user-guide/dask.rst b/doc/user-guide/dask.rst
@@ -298,7 +298,7 @@ Automatic parallelization with ``apply_ufunc`` and ``map_blocks``
 
 .. tip::
 
-   Some problems can become embarassingly parallel and thus easy to parallelize
+   Some problems can become embarrassingly parallel and thus easy to parallelize
    automatically by rechunking to a frequency, e.g. ``ds.chunk(time=TimeResampler("YE"))``.
    See :py:meth:`Dataset.chunk` for more.
 
@@ -559,7 +559,7 @@ larger chunksizes.
 
 .. tip::
 
-   Many time domain problems become amenable to an embarassingly parallel or blockwise solution
+   Many time domain problems become amenable to an embarrassingly parallel or blockwise solution
    (e.g. using :py:func:`xarray.map_blocks`, :py:func:`dask.array.map_blocks`, or
    :py:func:`dask.array.blockwise`) by rechunking to a frequency along the time dimension.
    Provide :py:class:`xarray.groupers.TimeResampler` objects to :py:meth:`Dataset.chunk` to do so.

diff --git a/doc/user-guide/data-structures.rst b/doc/user-guide/data-structures.rst
@@ -289,7 +289,7 @@ pressure that were made under various conditions:
 * the measurements were made on four different days;
 * they were made at two separate locations, which we will represent using
   their latitude and longitude; and
-* they were made using instruments by three different manufacutrers, which we
+* they were made using instruments by three different manufacturers, which we
   will refer to as `'manufac1'`, `'manufac2'`, and `'manufac3'`.
 
 .. ipython:: python

diff --git a/doc/user-guide/pandas.rst b/doc/user-guide/pandas.rst
@@ -120,7 +120,7 @@ Particularly after a roundtrip, the following deviations are noted:
 
 - a non-dimension Dataset ``coordinate`` is converted into ``variable``
 - a non-dimension DataArray ``coordinate`` is not converted
-- ``dtype`` is not allways the same (e.g. "str" is converted to "object")
+- ``dtype`` is not always the same (e.g. "str" is converted to "object")
 - ``attrs`` metadata is not conserved
 
 To avoid these problems, the third-party `ntv-pandas <https://github.com/loco-philippe/ntv-pandas>`__ library offers lossless and reversible conversions between

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -122,7 +122,7 @@ New Features
   (:issue:`6610`, :pull:`8840`).
   By `Deepak Cherian <https://github.com/dcherian>`_.
 - Allow rechunking to a frequency using ``Dataset.chunk(time=TimeResampler("YE"))`` syntax. (:issue:`7559`, :pull:`9109`)
-  Such rechunking allows many time domain analyses to be executed in an embarassingly parallel fashion.
+  Such rechunking allows many time domain analyses to be executed in an embarrassingly parallel fashion.
   By `Deepak Cherian <https://github.com/dcherian>`_.
 - Allow per-variable specification of ```mask_and_scale``, ``decode_times``, ``decode_timedelta``
   ``use_cftime`` and ``concat_characters`` params in :py:func:`~xarray.open_dataset`  (:pull:`9218`).
@@ -155,7 +155,7 @@ Breaking changes
 
 Bug fixes
 ~~~~~~~~~
-- Fix scatter plot broadcasting unneccesarily. (:issue:`9129`, :pull:`9206`)
+- Fix scatter plot broadcasting unnecessarily. (:issue:`9129`, :pull:`9206`)
   By `Jimmy Westling <https://github.com/illviljan>`_.
 - Don't convert custom indexes to ``pandas`` indexes when computing a diff (:pull:`9157`)
   By `Justus Magin <https://github.com/keewis>`_.
@@ -618,7 +618,7 @@ Internal Changes
 ~~~~~~~~~~~~~~~~
 
 - The implementation of :py:func:`map_blocks` has changed to minimize graph size and duplication of data.
-  This should be a strict improvement even though the graphs are not always embarassingly parallel any more.
+  This should be a strict improvement even though the graphs are not always embarrassingly parallel any more.
   Please open an issue if you spot a regression. (:pull:`8412`, :issue:`8409`).
   By `Deepak Cherian <https://github.com/dcherian>`_.
 - Remove null values before plotting. (:pull:`8535`).

diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py
@@ -740,7 +740,7 @@ def _generate_anchored_deprecated_frequencies(
     return pairs
 
 
-_DEPRECATED_FREQUENICES: dict[str, str] = {
+_DEPRECATED_FREQUENCIES: dict[str, str] = {
     "A": "YE",
     "Y": "YE",
     "AS": "YS",
@@ -766,7 +766,7 @@ def _generate_anchored_deprecated_frequencies(
 
 
 def _emit_freq_deprecation_warning(deprecated_freq):
-    recommended_freq = _DEPRECATED_FREQUENICES[deprecated_freq]
+    recommended_freq = _DEPRECATED_FREQUENCIES[deprecated_freq]
     message = _DEPRECATION_MESSAGE.format(
         deprecated_freq=deprecated_freq, recommended_freq=recommended_freq
     )
@@ -792,7 +792,7 @@ def to_offset(
     freq_data = match.groupdict()
 
     freq = freq_data["freq"]
-    if warn and freq in _DEPRECATED_FREQUENICES:
+    if warn and freq in _DEPRECATED_FREQUENCIES:
         _emit_freq_deprecation_warning(freq)
     multiples = freq_data["multiple"]
     multiples = 1 if multiples is None else int(multiples)

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -9750,7 +9750,7 @@ def eval(
         Calculate an expression supplied as a string in the context of the dataset.
 
         This is currently experimental; the API may change particularly around
-        assignments, which currently returnn a ``Dataset`` with the additional variable.
+        assignments, which currently return a ``Dataset`` with the additional variable.
         Currently only the ``python`` engine is supported, which has the same
         performance as executing in python.
 

diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py
@@ -1506,7 +1506,7 @@ def to_netcdf(
         mode : {"w", "a"}, default: "w"
             Write ('w') or append ('a') mode. If mode='w', any existing file at
             this location will be overwritten. If mode='a', existing variables
-            will be overwritten. Only appies to the root group.
+            will be overwritten. Only applies to the root group.
         encoding : dict, optional
             Nested dictionary with variable names as keys and dictionaries of
             variable specific encodings as values, e.g.,

diff --git a/xarray/core/datatree_ops.py b/xarray/core/datatree_ops.py
@@ -224,7 +224,7 @@ def insert_doc_addendum(docstring: str | None, addendum: str) -> str | None:
     Dataset directly as well as the mixins: DataWithCoords, DatasetAggregations, and DatasetOpsMixin.
 
     The majority of the docstrings fall into a parseable pattern. Those that
-    don't, just have the addendum appeneded after. None values are returned.
+    don't, just have the addendum appended after. None values are returned.
 
     """
     if docstring is None:

diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py
@@ -1802,7 +1802,7 @@ def check_variables():
 
 def _apply_indexes_fast(indexes: Indexes[Index], args: Mapping[Any, Any], func: str):
     # This function avoids the call to indexes.group_by_index
-    # which is really slow when repeatidly iterating through
+    # which is really slow when repeatedly iterating through
     # an array. However, it fails to return the correct ID for
     # multi-index arrays
     indexes_fast, coords = indexes._indexes, indexes._variables

diff --git a/xarray/core/merge.py b/xarray/core/merge.py
@@ -267,7 +267,7 @@ def merge_collected(
                         index, other_index, variable, other_var, index_cmp_cache
                     ):
                         raise MergeError(
-                            f"conflicting values/indexes on objects to be combined fo coordinate {name!r}\n"
+                            f"conflicting values/indexes on objects to be combined for coordinate {name!r}\n"
                             f"first index: {index!r}\nsecond index: {other_index!r}\n"
                             f"first variable: {variable!r}\nsecond variable: {other_var!r}\n"
                         )

diff --git a/xarray/core/variable.py b/xarray/core/variable.py
@@ -1658,7 +1658,7 @@ def reduce(  # type: ignore[override]
             _get_keep_attrs(default=False) if keep_attrs is None else keep_attrs
         )
 
-        # Noe that the call order for Variable.mean is
+        # Note that the call order for Variable.mean is
         #    Variable.mean -> NamedArray.mean -> Variable.reduce
         #    -> NamedArray.reduce
         result = super().reduce(

diff --git a/xarray/datatree_/docs/source/data-structures.rst b/xarray/datatree_/docs/source/data-structures.rst
@@ -40,7 +40,7 @@ stored under hashable keys), and so has the same key properties:
 - ``dims``: a dictionary mapping of dimension names to lengths, for the variables in this node,
 - ``data_vars``: a dict-like container of DataArrays corresponding to variables in this node,
 - ``coords``: another dict-like container of DataArrays, corresponding to coordinate variables in this node,
-- ``attrs``: dict to hold arbitary metadata relevant to data in this node.
+- ``attrs``: dict to hold arbitrary metadata relevant to data in this node.
 
 A single ``DataTree`` object acts much like a single ``Dataset`` object, and has a similar set of dict-like methods
 defined upon it. However, ``DataTree``'s can also contain other ``DataTree`` objects, so they can be thought of as nested dict-like

diff --git a/xarray/datatree_/docs/source/hierarchical-data.rst b/xarray/datatree_/docs/source/hierarchical-data.rst
@@ -133,7 +133,7 @@ We can add Herbert to the family tree without displacing Homer by :py:meth:`~Dat
 
 .. note::
    This example shows a minor subtlety - the returned tree has Homer's brother listed as ``"Herbert"``,
-   but the original node was named "Herbert". Not only are names overriden when stored as keys like this,
+   but the original node was named "Herbert". Not only are names overridden when stored as keys like this,
    but the new node is a copy, so that the original node that was reference is unchanged (i.e. ``herbert.name == "Herb"`` still).
    In other words, nodes are copied into trees, not inserted into them.
    This is intentional, and mirrors the behaviour when storing named ``xarray.DataArray`` objects inside datasets.

diff --git a/xarray/plot/dataset_plot.py b/xarray/plot/dataset_plot.py
@@ -737,7 +737,7 @@ def _temp_dataarray(ds: Dataset, y: Hashable, locals_: dict[str, Any]) -> DataAr
             coords[key] = darray
             dims.update(darray.dims)
 
-    # Trim dataset from unneccessary dims:
+    # Trim dataset from unnecessary dims:
     ds_trimmed = ds.drop_dims(ds.sizes.keys() - dims)  # TODO: Use ds.dims in the future
 
     # The dataarray has to include all the dims. Broadcast to that shape

diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py
@@ -1170,7 +1170,7 @@ def _legend_add_subtitle(handles, labels, text):
 
     if text and len(handles) > 1:
         # Create a blank handle that's not visible, the
-        # invisibillity will be used to discern which are subtitles
+        # invisibility will be used to discern which are subtitles
         # or not:
         blank_handle = plt.Line2D([], [], label=text)
         blank_handle.set_visible(False)

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -5041,9 +5041,10 @@ def test_extract_nc4_variable_encoding_netcdf4(self):
         var = xr.Variable(("x",), [1, 2, 3], {}, {"compression": "szlib"})
         _extract_nc4_variable_encoding(var, backend="netCDF4", raise_on_invalid=True)
 
+    @pytest.mark.xfail
     def test_extract_h5nc_encoding(self) -> None:
         # not supported with h5netcdf (yet)
-        var = xr.Variable(("x",), [1, 2, 3], {}, {"least_sigificant_digit": 2})
+        var = xr.Variable(("x",), [1, 2, 3], {}, {"least_significant_digit": 2})
         with pytest.raises(ValueError, match=r"unexpected encoding"):
             _extract_nc4_variable_encoding(var, raise_on_invalid=True)
 

diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
@@ -1797,6 +1797,6 @@ def test_minimize_graph_size():
         actual = len([key for key in graph if var in key[0]])
         # assert that we only include each chunk of an index variable
         # is only included once, not the product of number of chunks of
-        # all the other dimenions.
+        # all the other dimensions.
         # e.g. previously for 'x',  actual == numchunks['y'] * numchunks['z']
         assert actual == numchunks[var], (actual, numchunks[var])
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
@@ -6650,8 +6650,8 @@ def test_to_and_from_iris(self) -> None:
             ),
         )
 
-        for coord, orginal_key in zip((actual.coords()), original.coords):
-            original_coord = original.coords[orginal_key]
+        for coord, original_key in zip((actual.coords()), original.coords):
+            original_coord = original.coords[original_key]
             assert coord.var_name == original_coord.name
             assert_array_equal(
                 coord.points, CFDatetimeCoder().encode(original_coord.variable).values
@@ -6726,8 +6726,8 @@ def test_to_and_from_iris_dask(self) -> None:
             ),
         )
 
-        for coord, orginal_key in zip((actual.coords()), original.coords):
-            original_coord = original.coords[orginal_key]
+        for coord, original_key in zip((actual.coords()), original.coords):
+            original_coord = original.coords[original_key]
             assert coord.var_name == original_coord.name
             assert_array_equal(
                 coord.points, CFDatetimeCoder().encode(original_coord.variable).values

diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
@@ -6742,7 +6742,7 @@ def test_pad(self, padded_dim_name, constant_values) -> None:
             else:
                 np.testing.assert_equal(padded.sizes[ds_dim_name], ds_dim)
 
-        # check if coord "numbers" with dimention dim3 is paded correctly
+        # check if coord "numbers" with dimension dim3 is padded correctly
         if padded_dim_name == "dim3":
             assert padded["numbers"][[0, -1]].isnull().all()
             # twarning: passes but dtype changes from int to float

diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py
@@ -2004,7 +2004,7 @@ def test_plot_rgba_image_transposed(self) -> None:
             easy_array((4, 10, 15), start=0), dims=["band", "y", "x"]
         ).plot.imshow()
 
-    def test_warns_ambigious_dim(self) -> None:
+    def test_warns_ambiguous_dim(self) -> None:
         arr = DataArray(easy_array((3, 3, 3)), dims=["y", "x", "band"])
         with pytest.warns(UserWarning):
             arr.plot.imshow()

diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
@@ -576,7 +576,7 @@ def test_copy_deep_recursive(self) -> None:
         # lets just ensure that deep copy works without RecursionError
         v.copy(deep=True)
 
-        # indirect recusrion
+        # indirect recursion
         v2 = self.cls("y", [2, 3])
         v.attrs["other"] = v2
         v2.attrs["other"] = v
@@ -654,7 +654,7 @@ def test_aggregate_complex(self):
         expected = Variable((), 0.5 + 1j)
         assert_allclose(v.mean(), expected)
 
-    def test_pandas_cateogrical_dtype(self):
+    def test_pandas_categorical_dtype(self):
         data = pd.Categorical(np.arange(10, dtype="int64"))
         v = self.cls("x", data)
         print(v)  # should not error
@@ -1575,13 +1575,13 @@ def test_transpose_0d(self):
             actual = variable.transpose()
             assert_identical(actual, variable)
 
-    def test_pandas_cateogrical_dtype(self):
+    def test_pandas_categorical_dtype(self):
         data = pd.Categorical(np.arange(10, dtype="int64"))
         v = self.cls("x", data)
         print(v)  # should not error
         assert pd.api.types.is_extension_array_dtype(v.dtype)
 
-    def test_pandas_cateogrical_no_chunk(self):
+    def test_pandas_categorical_no_chunk(self):
         data = pd.Categorical(np.arange(10, dtype="int64"))
         v = self.cls("x", data)
         with pytest.raises(
@@ -2386,7 +2386,7 @@ def test_multiindex(self):
     def test_pad(self, mode, xr_arg, np_arg):
         super().test_pad(mode, xr_arg, np_arg)
 
-    def test_pandas_cateogrical_dtype(self):
+    def test_pandas_categorical_dtype(self):
         data = pd.Categorical(np.arange(10, dtype="int64"))
         with pytest.raises(ValueError, match="was found to be a Pandas ExtensionArray"):
             self.cls("x", data)