From 99cb25ccdac8d2949e88a5305cb60aaae60f05cd Mon Sep 17 00:00:00 2001 From: AnthonyTedde Date: Sun, 9 Jan 2022 22:23:37 +0100 Subject: [PATCH] update --- NAMESPACE | 2 ++ R/{gh_outlier.R => filters.R} | 0 R/log_message.R | 21 --------------- R/misc.R | 50 +++++++++++++++++++++++++++++++++++ R/partition.R | 40 ++++++++++++++++++++++++++++ man/GH_filter.Rd | 2 +- man/create_alpha_index.Rd | 23 ++++++++++++++++ man/create_kfold.Rd | 24 +++++++++++++++++ man/log_message.Rd | 11 +++++--- 9 files changed, 147 insertions(+), 26 deletions(-) rename R/{gh_outlier.R => filters.R} (100%) delete mode 100644 R/log_message.R create mode 100644 R/misc.R create mode 100644 R/partition.R create mode 100644 man/create_alpha_index.Rd create mode 100644 man/create_kfold.Rd diff --git a/NAMESPACE b/NAMESPACE index 82d412b..469d613 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,8 @@ S3method(t_outlier_test,formula) S3method(t_outlier_test,recipe) export(GH_filter) export(barycenter_distance) +export(create_alpha_index) +export(create_kfold) export(get_validation_set) export(log_message) export(outliars) diff --git a/R/gh_outlier.R b/R/filters.R similarity index 100% rename from R/gh_outlier.R rename to R/filters.R diff --git a/R/log_message.R b/R/log_message.R deleted file mode 100644 index 283ce95..0000000 --- a/R/log_message.R +++ /dev/null @@ -1,21 +0,0 @@ -#' Title -#' -#' @param message -#' @param title -#' -#' @return -#' @export -#' -#' @examples -log_message <- function(message, title = T){ - - if(title){ - cat(paste(rep("*", 50), collapse = ""), fill = T) - cat(message, fill = T) - cat(paste(rep("*", 50), collapse = ""), fill = T) - }else{ - cat(message, fill = T) - } - - flush.console() -} diff --git a/R/misc.R b/R/misc.R new file mode 100644 index 0000000..0410fe1 --- /dev/null +++ b/R/misc.R @@ -0,0 +1,50 @@ +#' Utility function to log formated messages into the console. +#' +#' @param message A character. Message to be printed. +#' @param title logical. If TRUE, a nice hand-crafted frame is painted around the +#' undoubtedly useful message. +#' +#' @return +#' @export +#' +#' @examples +log_message <- function(message, title = T){ + + if(title){ + cat(paste(rep("*", 50), collapse = ""), fill = T) + cat(message, fill = T) + cat(paste(rep("*", 50), collapse = ""), fill = T) + }else{ + cat(message, fill = T) + } + + flush.console() +} + + +#' Create names that could be ordered alphabetically +#' +#' @description +#' Create an ordered list of names using padding of incremental values. +#' +#' @param n An integer. The number of variables +#' @param prefix A character. The prefix added before each incremental value. +#' @param sep A character. The separator used between the prefix and the numerical +#' values. +#' +#' @return The function returns a vector of well-defined alphabetically-ordered +#' names. +#' @export +#' +#' @examples +create_alpha_index <- function(n = 10, prefix, sep = "_"){ + M <- floor(log(n, base = 10)) + z <- M - floor(log(1:n, base = 10)) + idx <- paste0( + sapply(z, FUN = function(times = z) paste(rep(0, times), collapse = "")), + 1:n + ) + if(!missing(prefix)) { + paste(prefix, idx, sep = sep) + }else idx +} diff --git a/R/partition.R b/R/partition.R new file mode 100644 index 0000000..a801e42 --- /dev/null +++ b/R/partition.R @@ -0,0 +1,40 @@ +#' Create k-folds partition of a training data.frame. +#' +#' @param X The data.frame to be partitioned. +#' @param k Integer. The number of folds +#' @param strata Character. The name of the column to stratify the data.frame +#' with. +#' @param seed A number. The random seed. +#' +#' @return +#' @export +#' +#' @examples +create_kfold <- function(X, k = 10, strata, seed){ + if(!missing(seed)) set.seed(seed) + + # -- Closure function that create the data partitions -- #### + create_kfold_closure <- function(x){ + partition_size <- floor(x / k) + remainder <- x %% k + c(rep(1:k, partition_size), sample(k, size = remainder)) + } + # -- Closure function that create the data partitions -- # + + # -- Do the data partitions need to be stratified ? -- #### + if(missing(strata)){ + K <- create_kfold_closure(nrow(X)) + sample(K, size = length(K)) + }else{ + id <- X[[strata]] + id_ord <- order(id) + a <- lapply(table(id), create_kfold_closure) + K <- Reduce(c, a)[order(id_ord)] + } + # -- Do the data partitions need to be stratified ? -- # + + # -- Output kfolds with names -- #### + structure(split(1:length(K), K), + names = create_alpha_index(n = k, prefix = "split")) + # -- Output kfolds with names -- # +} diff --git a/man/GH_filter.Rd b/man/GH_filter.Rd index f9d0d91..ce54b6e 100644 --- a/man/GH_filter.Rd +++ b/man/GH_filter.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gh_outlier.R +% Please edit documentation in R/filters.R \name{GH_filter} \alias{GH_filter} \title{Filter sample out using a standardized Mahalanobis distance threshold} diff --git a/man/create_alpha_index.Rd b/man/create_alpha_index.Rd new file mode 100644 index 0000000..f94f234 --- /dev/null +++ b/man/create_alpha_index.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R +\name{create_alpha_index} +\alias{create_alpha_index} +\title{Create names that could be ordered alphabetically} +\usage{ +create_alpha_index(n = 10, prefix, sep = "_") +} +\arguments{ +\item{n}{An integer. The number of variables} + +\item{prefix}{A character. The prefix added before each incremental value.} + +\item{sep}{A character. The separator used between the prefix and the numerical +values.} +} +\value{ +The function returns a vector of well-defined alphabetically-ordered +names. +} +\description{ +Create an ordered list of names using padding of incremental values. +} diff --git a/man/create_kfold.Rd b/man/create_kfold.Rd new file mode 100644 index 0000000..9ce5026 --- /dev/null +++ b/man/create_kfold.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/partition.R +\name{create_kfold} +\alias{create_kfold} +\title{Create k-folds partition of a training data.frame.} +\usage{ +create_kfold(X, k = 10, strata, seed) +} +\arguments{ +\item{X}{The data.frame to be partitioned.} + +\item{k}{Integer. The number of folds} + +\item{strata}{Character. The name of the column to stratify the data.frame +with.} + +\item{seed}{A number. The random seed.} +} +\value{ + +} +\description{ +Create k-folds partition of a training data.frame. +} diff --git a/man/log_message.Rd b/man/log_message.Rd index b182481..94d985e 100644 --- a/man/log_message.Rd +++ b/man/log_message.Rd @@ -1,17 +1,20 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/log_message.R +% Please edit documentation in R/misc.R \name{log_message} \alias{log_message} -\title{Title} +\title{Utility function to log formated messages into the console.} \usage{ log_message(message, title = T) } \arguments{ -\item{title}{} +\item{message}{A character. Message to be printed.} + +\item{title}{logical. If TRUE, a nice hand-crafted frame is painted around the +undoubtedly useful message.} } \value{ } \description{ -Title +Utility function to log formated messages into the console. }