Resolves casact#200 and improves tail docs

attiguyas · Oct 7, 2021 · 8cddafd · 8cddafd
1 parent b0aed1c
commit 8cddafd
Show file tree

Hide file tree

Showing 5 changed files with 1,217 additions and 865 deletions.
diff --git a/chainladder/tails/bondy.py b/chainladder/tails/bondy.py
@@ -17,7 +17,7 @@ class TailBondy(TailBase):
     ----------
     earliest_age : int
         The earliest age from which the Bondy exponent is to be calculated.
-        Defaults to earliest available in the Triangle. Any available development
+        Defaults to latest age in the Triangle. Any available development
         age can be used.
     attachment_age: int (default=None)
         The age at which to attach the fitted curve.  If None, then the latest
@@ -40,9 +40,6 @@ class TailBondy(TailBase):
         sigma with tail factor applied.
     std_err_ : Triangle
         std_err with tail factor applied
-    earliest_ldf_ : DataFrame
-        Based on the ``earliest_age`` selection, this shows the seed ``ldf_`` used
-        in fitting the Bondy exponent.
     projection_period : int
         The number of months beyond the latest available development age the
         `ldf_` and `cdf_` vectors should extend.
@@ -84,7 +81,7 @@ def fit(self, X, y=None, sample_weight=None):
         super().fit(X, y, sample_weight)
 
         if self.earliest_age is None:
-            earliest_age = X.ddims[0]
+            earliest_age = X.ddims[-2]
         else:
             earliest_age = X.ddims[
                 int(

diff --git a/chainladder/tails/clark.py b/chainladder/tails/clark.py
@@ -42,7 +42,7 @@ class TailClark(TailBase):
         The scale parameter of the model.
     norm_resid_ : Triangle
         The "Normalized" Residuals of the model according to Clark.
-    projection_period : int 
+    projection_period : int
         The number of months beyond the latest available development age the
         `ldf_` and `cdf_` vectors should extend.
     """
@@ -80,30 +80,20 @@ def fit(self, X, y=None, sample_weight=None):
         model = ClarkLDF(growth=self.growth).fit(X, sample_weight=sample_weight)
         xp = X.get_array_module()
         age_offset = {"Y": 6.0, "Q": 1.5, "M": 0.5}[X.development_grain]
-        fitted = 1 / model.G_(
-            xp.array(
-                [
-                    self._ave_period[1] + X.ddims - age_offset
-                    for item in range(self._ave_period[0] + 2)
-                ]
-            )[0]
-        )
+        fitted = 1 / model.G_(self.ldf_.ddims - age_offset)
         fitted = xp.concatenate(
             (
-                fitted.values[..., :-1] / fitted.values[..., -1:],
+                fitted.values[..., :-1] / fitted.values[..., 1:],
                 fitted.values[..., -1:],
             ),
             -1,
         )
         fitted = xp.repeat(fitted, self.ldf_.values.shape[2], 2)
         attachment_age = self.attachment_age if self.attachment_age else X.ddims[-2]
-        self.ldf_.values = xp.concatenate(
-            (
-                self.ldf_.values[..., : sum(X.ddims <= attachment_age)],
-                fitted[..., -sum(X.ddims >= attachment_age) :],
-            ),
-            axis=-1,
-        )
+        self.ldf_.values = xp.concatenate((
+            self.ldf_.values[..., : sum(self.ldf_.ddims < attachment_age)],
+            fitted[..., -sum(self.ldf_.ddims >= attachment_age) :],),
+            axis=-1,)
         self.omega_ = model.omega_
         self.theta_ = model.theta_
         self.G_ = model.G_

diff --git a/chainladder/tails/curve.py b/chainladder/tails/curve.py
@@ -15,10 +15,12 @@ class TailCurve(TailBase):
     ----------
     curve : str ('exponential', 'inverse_power')
         The type of curve extrapolation you'd like to use
-    fit_period : tuple (start, stop)
+    fit_period : tuple (start, stop) or list(bool)
         A tuple representing the range of ldfs to use in the curve fit.
         The use of ``None`` will use the edge of the triangle.  For example,
         (48, None) will use development factors for age 48 and beyond.
+        Alternatively, passing a list of booleans [True, False, ...] will
+        allow for excluding (False) any development patterns from fitting.
     extrap_periods : int
         Then number of development periods from attachment point to extrapolate
         the fit.
@@ -101,6 +103,8 @@ def fit(self, X, y=None, sample_weight=None):
                 "Slicing for fit_period is deprecated and will be removed. Please use a tuple (start_age, end_age)."
             )
             fit_period = self.fit_period
+        elif type(self.fit_period) is list:
+            fit_period = xp.array(self.fit_period)[None, None, None, :]
         else:
             grain = {"Y": 12, "Q": 3, "M": 1}[X.development_grain]
             start = (
@@ -118,20 +122,26 @@ def fit(self, X, y=None, sample_weight=None):
         xp = self.ldf_.get_array_module()
         _y = self.ldf_.values[..., : X.shape[-1] - 1].copy()
         _w = xp.zeros(_y.shape)
-        _w[..., fit_period] = 1.0
+        if type(fit_period) is slice:
+            _w[..., fit_period] = 1.0
+        else:
+            _w = (_w + 1) * fit_period
         if self.reg_threshold[0] is None:
-            warnings.warn("Lower threshold for ldfs not set. Lower threshold will be set to 1.0 to ensure" \
-                          "valid inputs for regression.")
+            warnings.warn(
+            "Lower threshold for ldfs not set. Lower threshold will be set to 1.0 to ensure",
+            "valid inputs for regression.")
             lower_threshold = 1
         elif self.reg_threshold[0] < 1:
-            warnings.warn("Lower threshold for ldfs set too low (<1). Lower threshold will be set to 1.0 to ensure" \
-                          "valid inputs for regression.")
+            warnings.warn(
+            "Lower threshold for ldfs set too low (<1). Lower threshold will be set to 1.0 to ensure "
+            "valid inputs for regression.")
             lower_threshold = 1
         else:
             lower_threshold = self.reg_threshold[0]
         if self.reg_threshold[1] is not None:
             if self.reg_threshold[1] <= lower_threshold:
-                warnings.warn("Can't set upper threshold for ldfs below lower threshold. Upper threshold will be set to 'None'.")
+                warnings.warn(
+                "Can't set upper threshold for ldfs below lower threshold. Upper threshold will be set to 'None'.")
                 upper_threshold = None
             else:
                 upper_threshold = self.reg_threshold[1]
@@ -144,7 +154,7 @@ def fit(self, X, y=None, sample_weight=None):
             else:
                 _w[(_y <= lower_threshold) | (_y > upper_threshold)] = 0
                 _y[(_y <= lower_threshold) | (_y > upper_threshold)] = 1.01
-        elif self.errors == "raise" and xp.any(y < 1.0):
+        elif self.errors == "raise" and xp.any(_y < 1.0):
             raise ZeroDivisionError("Tail fit requires all LDFs to be greater than 1.0")
         _y = xp.log(_y - 1)
         n_obs = X.shape[-1] - 1

diff --git a/chainladder/tails/tests/test_bondy.py b/chainladder/tails/tests/test_bondy.py
@@ -4,4 +4,4 @@
 def test_bondy1():
     tri = cl.load_sample("tail_sample")["paid"]
     dev = cl.Development(average="simple").fit_transform(tri)
-    assert round(float(cl.TailBondy().fit(dev).cdf_.values[0, 0, 0, -2]), 3) == 1.028
+    assert round(float(cl.TailBondy(earliest_age=12).fit(dev).cdf_.values[0, 0, 0, -2]), 3) == 1.028