-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
4fd4f02
commit e9a95fd
Showing
18 changed files
with
446 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,9 @@ | ||
exportPattern("^[[:alpha:]]+") | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(GH_outlier) | ||
export(log_message) | ||
export(outliars) | ||
export(t_test_outlier) | ||
export(unscale) | ||
export(workout) | ||
importFrom(purrr,"%>%") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#' Title | ||
#' | ||
#' @param message | ||
#' @param title | ||
#' | ||
#' @return | ||
#' @export | ||
#' | ||
#' @examples | ||
log_message <- function(message, title = T){ | ||
|
||
if(title){ | ||
cat(paste(rep("*", 50), collapse = ""), fill = T) | ||
cat(message, fill = T) | ||
cat(paste(rep("*", 50), collapse = ""), fill = T) | ||
}else{ | ||
cat(message, fill = T) | ||
} | ||
|
||
flush.console() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
#' Outlier detection for quantitative data | ||
#' | ||
#' @param data Data.frame. Candidate data for outliers detection. | ||
#' @param std_err Integer. Which specifies how far from the mean a sample should be for being considered as an outlier. | ||
#' @param remove Boolean. Default TRUE. If TRUE, then the function returns a new dataset minus the assessed outliers. Otherwise, it returns a logical atomic vector specifying - by the boolean value False - which data is an outlier. | ||
#' @param verbose Boolean. Default TRUE. If TRUE, some of the intermediate results are output to the console. | ||
#' | ||
#' @return See remove argument. | ||
#' @export | ||
#' | ||
#' @examples | ||
outliars <- function(data, | ||
std_err = 3, | ||
remove = TRUE, | ||
verbose = TRUE){ | ||
|
||
|
||
outliars_subset <- data %>% | ||
dplyr::select_if(is.numeric) %>% | ||
purrr::map( | ||
~as.list( | ||
mean(.x, na.rm = T) + c(-1, 1) * std_err * sd(.x, na.rm = T)) %>% | ||
setNames(nm = c("lower_bound", "upper_bound") | ||
) | ||
) %>% | ||
purrr::imap( | ||
~data[[.y]] > .x$lower_bound & data[[.y]] < .x$upper_bound | ||
) %>% | ||
purrr::reduce(`*`) %>% as.logical() | ||
|
||
|
||
if(verbose){ | ||
message <- c( | ||
paste0(sum(!outliars_subset), "/", length(outliars_subset)), | ||
"rows removed" | ||
) | ||
utilitR::log_message(message = message, title = F) | ||
} | ||
|
||
|
||
if(remove) | ||
dplyr::filter(data, outliars_subset) | ||
else outliars_subset | ||
} | ||
|
||
#' Title | ||
#' | ||
#' @param formula depedent var ~ independent ones. | ||
#' @param data Candidate data. | ||
#' @param method List of methods to apply. | ||
#' @param ... | ||
#' | ||
#' @return a logical atomic vector of row outliers based on the combination of | ||
#' all performed t-test outliers detection. | ||
#' @export | ||
#' | ||
#' @examples | ||
t_test_outlier <- function(formula, data, method, verbose = T, ...){ | ||
|
||
|
||
################################### | ||
# Create the formula based on y | ||
################################### | ||
|
||
f <- dplyr::enquo(formula) %>% | ||
dplyr::as_label() %>% formula | ||
|
||
method %>% purrr::map(.f = function(m){ | ||
|
||
outliers <- rep(T, nrow(data)) | ||
ctrl <- caret::trainControl(method = "cv") | ||
|
||
if(verbose) | ||
utilitR::log_message(message = m$method) | ||
|
||
repeat{ | ||
|
||
arguments <- c(list( | ||
form = f, | ||
data = data[outliers, ], | ||
preProcess = c("center", "scale", "nzv"), | ||
trControl = ctrl | ||
), m) | ||
|
||
data_model <- do.call(caret::train, arguments) | ||
|
||
data_prediction <- predict(data_model, data[outliers, ]) | ||
residuals <- data_model$trainingData$.outcome - data_prediction | ||
|
||
outliers_current <- utilitR::outliars(data.frame(residuals), remove = F) | ||
|
||
if(all(outliers_current)) break | ||
else outliers[which(outliers)[which(!outliers_current)]] <- F | ||
|
||
|
||
} # End of the repeat loop | ||
return(outliers) | ||
}) %>% | ||
Reduce(f = `*`) %>% as.logical | ||
} | ||
|
||
#' Remove row outliers based on the standardized Mahalanobis distance of | ||
#' data points. | ||
#' | ||
#' @inheritParams outliars | ||
#' @param std_err Threshold above which data is considered as outlier. | ||
#' @param ... Argument to pass through caret::preProcess function. e.g. | ||
#' thresh. (see caret::preProcess) | ||
#' | ||
#' @return | ||
#' @export | ||
#' | ||
#' @examples | ||
GH_outlier <- function(data, | ||
variables = dplyr::everything(), | ||
std_err = 3, | ||
remove = TRUE, | ||
verbose = TRUE, | ||
...){ | ||
|
||
|
||
################################### | ||
# data management and preprocess | ||
################################### | ||
|
||
data <- data %>% | ||
dplyr::select(variables) | ||
|
||
data_pca_preproc <- data %>% | ||
caret::preProcess(method = c("center", "scale", "nzv", "pca"), ...) | ||
|
||
data_pca <- predict(data_pca_preproc, data) %>% | ||
tibble::as_tibble() | ||
|
||
|
||
################################### | ||
# Mahalanobis and GH | ||
################################### | ||
|
||
pc_mahalanobis <- data_pca %>% | ||
mahalanobis(., center = colMeans(.), cov = cov(.)) | ||
|
||
pc_GH <- pc_mahalanobis / ncol(data_pca) | ||
row_to_keep <- pc_GH < std_err | ||
|
||
|
||
################################### | ||
# log Message | ||
################################### | ||
|
||
if(verbose){ | ||
line1 = paste0("Outliers: ", sum(!row_to_keep)) | ||
line2 = paste0("Proportion: ", sum(!row_to_keep) / length(row_to_keep)) | ||
message = paste(line1, line2, sep = "\n") | ||
utilitR::log_message(message = message) | ||
} | ||
|
||
|
||
################################### | ||
# remove -> return the new dataset. | ||
################################### | ||
|
||
if(remove) | ||
data %>% | ||
dplyr::filter(row_to_keep) %>% | ||
tibble::as_tibble() | ||
else row_to_keep | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#' Unscaled scaled data according to the former ones. | ||
#' | ||
#' @param scaled_data The data to by unscaled. | ||
#' @param template The template data for unscaling. | ||
#' | ||
#' @return A data.frame of unscaled data. | ||
#' @export | ||
#' | ||
#' @examples | ||
unscale <- function(scaled_data, template){ | ||
purrr::pmap_dfc(list(scaled_data, template), .f = function(s, t){ | ||
if(is.numeric(s)) | ||
s * sd(t, na.rm = T) + mean(t, na.rm = T) | ||
else s | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
#' @keywords internal | ||
"_PACKAGE" | ||
|
||
#' @importFrom purrr %>% | ||
NULL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
#' Workout | ||
#' | ||
#' @description A function to perform models training accross with different | ||
#' data. Simultaneously. | ||
#' | ||
#' @param args All the argumnets to pass through the caret::train function | ||
#' @param seed A seed. For reproducibility. | ||
#' @param subset The row subset of data to use for the analysis. All by default. | ||
#' @param features A subset of regressors to use. All by default. | ||
#' | ||
#' @return A list of caret object. | ||
#' | ||
#' @export | ||
#' | ||
#' @examples | ||
workout <- function(args, seed = 1010, subset = T, features = T){ | ||
args %>% purrr::map(.f = function(arg){ | ||
cat(arg$method) | ||
cat(arg$data) | ||
|
||
## Get the data from character vector denoting the variable name | ||
arg$data <- get(arg$data)[subset, ] | ||
|
||
set.seed(seed) | ||
do.call(caret::train, arg) | ||
}) | ||
} | ||
|
||
workout_prepare <- function(data_lst, method_lst, method_parameter_lst){ | ||
method_name <- names(method_lst) | ||
data_name <- names(data_lst) | ||
mdls_names <- do.call(paste, | ||
c(expand.grid(data_name, method_name), sep = "_")) | ||
args <- purrr::cross3(data_lst, method_lst, method_parameter_lst) | ||
# %>% | ||
# purrr::map(purrr::flatten) | ||
return(list( | ||
args = args, | ||
mdls_names = mdls_names | ||
)) | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Oops, something went wrong.