diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml
index 7c7151e..f89e8c4 100644
--- a/.github/workflows/check-bioc.yml
+++ b/.github/workflows/check-bioc.yml
@@ -260,7 +260,7 @@ jobs:
         run: |
           ## Temporary workaround for https://github.com/actions/checkout/issues/766
           git config --global --add safe.directory "$GITHUB_WORKSPACE"
-          
+
           git config --local user.email "actions@github.com"
           git config --local user.name "GitHub Actions"
           Rscript -e "pkgdown::deploy_to_branch(new_process = FALSE)"
@@ -276,3 +276,4 @@ jobs:
         with:
           name: ${{ runner.os }}-biocversion-RELEASE_3_16-r-4.2-results
           path: check
+          
diff --git a/DESCRIPTION b/DESCRIPTION
index 8c0fe82..0f94ff9 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -28,7 +28,8 @@ Encoding: UTF-8
 LazyData: true
 Biarch: true
 Depends: 
-    R (>= 4.1.0)
+    R (>= 4.1.0),
+    rstan (>= 2.18.1)
 Imports: 
     benchmarkme,
     dplyr,
@@ -44,7 +45,6 @@ Imports:
     Rcpp (>= 0.12.0),
     RcppParallel (>= 5.0.1),
     rlang,
-    rstan (>= 2.18.1),
     rstantools (>= 2.1.1),
     stats,
     tibble,
diff --git a/NAMESPACE b/NAMESPACE
index 87a9d51..33c10eb 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -16,8 +16,6 @@ importFrom(RcppParallel,CxxFlags)
 importFrom(RcppParallel,RcppParallelLibs)
 importFrom(benchmarkme,get_ram)
 importFrom(dplyr,enquo)
-importFrom(edgeR,calcNormFactors)
-importFrom(edgeR,filterByExpr)
 importFrom(foreach,"%do%")
 importFrom(foreach,foreach)
 importFrom(graphics,par)
@@ -33,12 +31,11 @@ importFrom(purrr,map2)
 importFrom(purrr,map_chr)
 importFrom(purrr,map_int)
 importFrom(purrr,pmap)
+importFrom(purrr,reduce)
 importFrom(purrr,when)
 importFrom(rlang,":=")
-importFrom(rlang,enquo)
 importFrom(rlang,quo_is_null)
 importFrom(rlang,quo_is_symbol)
-importFrom(rstan,extract)
 importFrom(rstan,sampling)
 importFrom(rstan,summary)
 importFrom(rstan,vb)
@@ -64,6 +61,7 @@ importFrom(tidyr,separate)
 importFrom(tidyr,spread)
 importFrom(tidyr,unnest)
 importFrom(utils,head)
+importFrom(utils,install.packages)
 importFrom(utils,packageVersion)
 importFrom(utils,tail)
 useDynLib(ppcseq, .registration = TRUE)
diff --git a/R/ppcseq.R b/R/methods.R
similarity index 84%
rename from R/ppcseq.R
rename to R/methods.R
index 8ff2a1e..33d1047 100644
--- a/R/ppcseq.R
+++ b/R/methods.R
@@ -28,6 +28,7 @@
 #' @param .abundance A column name as symbol. The transcript abundance (read count)
 #' @param .significance A column name as symbol. A column with the Pvalue, or other significance measure (preferred Pvalue over false discovery rate)
 #' @param .do_check A column name as symbol. A column with a boolean indicating whether a transcript was identified as differentially abundant
+#' @param .scaling_factor In case the scaling factor must not be calculated (TMM method) using the input data but provided. It is useful, for example, for pseudobulk single-cell where the scaling might depend on sample sequencing depth for all cells rather than a particular cell type.
 #' @param percent_false_positive_genes A real between 0 and 100. It is the aimed percent of transcript being a false positive. For example, percent_false_positive_genes = 1 provide 1 percent of the calls for outlier containing transcripts that has actually not outliers.
 #' @param approximate_posterior_inference A boolean. Whether the inference of the joint posterior distribution should be approximated with variational Bayes It confers execution time advantage.
 #' @param approximate_posterior_analysis A boolean. Whether the calculation of the credible intervals should be done semi-analytically, rather than with pure sampling from the posterior. It confers execution time and memory advantage.
@@ -43,7 +44,7 @@
 #' @param seed An integer. Used for development and testing purposes
 #' @param adj_prob_theshold_2 A boolean. Used for development and testing purposes
 #'
-#' @return A nested tibble `tbl` with transcript-wise information: `sample wise data` | plot | `ppc samples failed` | `tot deleterious outliers`
+#' @return A nested tibble `tbl` with transcript-wise information: `sample_wise_data` | plot | `ppc samples failed` | `tot deleterious_outliers`
 #'
 #' @examples
 #'
@@ -77,6 +78,7 @@ identify_outliers = function(.data,
 														 .abundance,
 														 .significance,
 														 .do_check,
+														 .scaling_factor = NULL,
 														 percent_false_positive_genes = 1,
 														 how_many_negative_controls = 500,
 
@@ -100,15 +102,16 @@ identify_outliers = function(.data,
 	.abundance = enquo(.abundance)
 	.significance = enquo(.significance)
 	.do_check = enquo(.do_check)
+	.scaling_factor = enquo(.scaling_factor)
 
 	# Get factor of interest
 	#factor_of_interest = ifelse(parse_formula(formula) %>% length %>% `>` (0), parse_formula(formula)[1], "")
 
 	# Check if columns exist
-	check_columns_exist(.data, !!.sample, !!.transcript, !!.abundance, !!.significance, !!.do_check)
+	check_columns_exist(.data, !!.sample, !!.transcript, !!.abundance, !!.significance)
 
 	# Check if any column is NA or null
-	check_if_any_NA(.data, !!.sample, !!.transcript, !!.abundance, !!.significance, !!.do_check, parse_formula(formula))
+	check_if_any_NA(.data, c(quo_name(.sample), quo_name(.transcript), quo_name(.abundance), quo_name(.significance), parse_formula(formula)))
 
 	# Check if I have any genes to check
 	if(.data %>%	filter(!!.do_check) %>% nrow %>% equals(0)){
@@ -120,7 +123,7 @@ identify_outliers = function(.data,
 			d = 1L
 		) %>%
 			slice(0) %>%
-			setNames(c(quo_name(.transcript), "sample wise data", "ppc samples failed", "tot deleterious outliers")))
+			setNames(c(quo_name(.transcript), "sample_wise_data", "ppc samples failed", "tot deleterious_outliers")))
 	}
 
 
@@ -193,13 +196,15 @@ identify_outliers = function(.data,
 
 
 	# distinct_at is not released yet for dplyr, thus we have to use this trick
-	my_df <- format_input(
-		.data,
+	my_df <-
+		.data %>%
+		mutate(do_check___ = !!.do_check) %>%
+		format_input(
 		formula,
 		!!.sample,
 		!!.transcript,
 		!!.abundance,
-		!!.do_check,
+		do_check___,
 		!!.significance,
 		how_many_negative_controls
 	)
@@ -212,46 +217,65 @@ identify_outliers = function(.data,
 	# Prior info
 	lambda_mu_mu = 5.612671
 
-	# Scale dataset
-	my_df_scaled =
-		my_df %>%
-		.identify_abundant(!!.sample,!!.transcript,!!.abundance) %>%
-		get_scaled_counts_bulk(!!.sample,!!.transcript,!!.abundance) %>%
-		left_join(my_df, by=quo_name(.sample)) %>%
-		dplyr::mutate(!!as.symbol(sprintf("%s_scaled",  quo_name(.abundance))) := !!.abundance * multiplier)
-
-	# Build better scales for the inference
-	exposure_rate_multiplier =
-		my_df_scaled %>%
-		distinct(!!.sample, TMM, multiplier) %>%
-		mutate(l = multiplier %>% log) %>%
-		summarise(l %>% sd) %>%
-		pull(`l %>% sd`)
-
-	# Build better scales for the inference
-	intercept_shift_scale =
-		my_df_scaled %>%
-		mutate(cc =
-					 	!!as.symbol(sprintf(
-					 		"%s_scaled",  quo_name(.abundance)
-					 	)) %>%
-					 	`+` (1) %>% log) %>%
-		summarise(shift = cc %>% mean, scale = cc %>% sd) %>%
-		as.numeric
+
+	# Find normalisation
+	if(quo_is_null(.scaling_factor))
+		sample_scaling =
+			my_df %>%
+			.identify_abundant(!!.sample,!!.transcript,!!.abundance) %>%
+			get_scaled_counts_bulk(!!.sample,!!.transcript,!!.abundance)  %>%
+			distinct(!!.sample, multiplier)
+	else
+		sample_scaling =
+			.data %>%
+			distinct(!!.sample, multiplier= !!.scaling_factor) %>%
+			distinct()
+
+	sample_exposure =
+		sample_scaling %>%
+		mutate(exposure_rate = -log(multiplier)) %>%
+		mutate(exposure_multiplier = exp(exposure_rate)) %>%
+		rename(multiplier := !!.scaling_factor)
+
+	# # Scale dataset
+	# my_df_scaled =
+	# 	my_df %>%
+	# 	.identify_abundant(!!.sample,!!.transcript,!!.abundance) %>%
+	# 	get_scaled_counts_bulk(!!.sample,!!.transcript,!!.abundance) %>%
+	# 	left_join(my_df, by=quo_name(.sample)) %>%
+	# 	dplyr::mutate(!!as.symbol(sprintf("%s_scaled",  quo_name(.abundance))) := !!.abundance * multiplier)
+
+	# # Build better scales for the inference
+	# exposure_rate_multiplier =
+	# 	my_df_scaled %>%
+	# 	distinct(!!.sample, TMM, multiplier) %>%
+	# 	mutate(l = multiplier %>% log) %>%
+	# 	summarise(l %>% sd) %>%
+	# 	pull(`l %>% sd`)
+	#
+	# # Build better scales for the inference
+	# intercept_shift_scale =
+	# 	my_df_scaled %>%
+	# 	mutate(cc =
+	# 				 	!!as.symbol(sprintf(
+	# 				 		"%s_scaled",  quo_name(.abundance)
+	# 				 	)) %>%
+	# 				 	`+` (1) %>% log) %>%
+	# 	summarise(shift = cc %>% mean, scale = cc %>% sd) %>%
+	# 	as.numeric
 
 	# Run the first discovery phase with permissive false discovery rate
 	res_discovery =
 		my_df %>%
 		do_inference(
-			formula,!!.sample ,!!.transcript ,!!.abundance ,!!.significance ,!!.do_check,
+			formula,!!.sample ,!!.transcript ,!!.abundance ,!!.significance ,do_check___,
 			approximate_posterior_inference,
 			approximate_posterior_analysis = FALSE,
 			C,
 			X,
 			lambda_mu_mu,
 			cores,
-			exposure_rate_multiplier,
-			intercept_shift_scale,
+			sample_exposure,
 			additional_parameters_to_save,
 			adj_prob_theshold  = adj_prob_theshold_1,
 			how_many_posterior_draws = how_many_posterior_draws_1,
@@ -270,7 +294,7 @@ identify_outliers = function(.data,
 		filter(`.variable` == "counts_rng") %>%
 		ifelse_pipe(
 			do_check_only_on_detrimental,
-			~ .x %>% filter(`deleterious outliers`),
+			~ .x %>% filter(`deleterious_outliers`),
 			~ .x %>% filter(!ppc)
 		) %>%
 		distinct(S, G, .lower, .upper)
@@ -296,15 +320,14 @@ identify_outliers = function(.data,
 	res_test =
 		my_df %>%
 		do_inference(
-			formula,!!.sample ,!!.transcript ,!!.abundance ,!!.significance ,!!.do_check,
+			formula,!!.sample ,!!.transcript ,!!.abundance ,!!.significance ,do_check___,
 			approximate_posterior_inference,
 			approximate_posterior_analysis,
 			C,
 			X,
 			lambda_mu_mu,
 			cores,
-			exposure_rate_multiplier,
-			intercept_shift_scale,
+			sample_exposure,
 			additional_parameters_to_save,
 			adj_prob_theshold = adj_prob_theshold_2, # If check only deleterious is one side test
 			# * 2 because we just test one side of the distribution
@@ -320,23 +343,9 @@ identify_outliers = function(.data,
 
 	# Merge results and return
 	merge_results(
-
-		# Calculate CI 2 for discovery for plotting
 		res_discovery,
-
-		# %>%
-		# 	left_join(
-		# 		(.) %>%
-		# 			attr("fit") %>%
-		# 			fit_to_counts_rng_approximated(adj_prob_theshold_2, how_many_posterior_draws_2, truncation_compensation = 0.7352941, cores) %>%
-		# 			select(S, G, .lower_1 = .lower, .upper_1 = .upper)
-		# 	),
-
-		res_test,
-		formula,
-		!!.transcript,
-		!!.abundance,
-		!!.sample,
+		res_test  %>%	left_join(sample_exposure, by = quo_name(.sample)) ,
+		formula,!!.transcript,!!.abundance,!!.sample,
 		do_check_only_on_detrimental
 	) %>%
 
@@ -409,7 +418,7 @@ plot_credible_intervals = function(.data){
 		mutate(plot =
 					 	pmap(
 					 		list(
-					 			`sample wise data`,
+					 			`sample_wise_data`,
 					 			!!as.symbol(.transcript),
 					 			# nested data for plot
 					 			.abundance,
diff --git a/R/tidybulk.R b/R/tidybulk.R
index 5e3a1d7..ca36791 100644
--- a/R/tidybulk.R
+++ b/R/tidybulk.R
@@ -1,16 +1,15 @@
 #' Drop lowly transcribed genes for TMM normalization
 #'
 #' @keywords internal
+#' @noRd
 #'
-#' @import dplyr
-#' @importFrom  tidyr spread
+#' @importFrom tidyr spread
+#' @importFrom tidyr drop_na
 #' @import tibble
 #' @importFrom rlang :=
 #' @importFrom stats median
-#' @importFrom edgeR filterByExpr
-#' @importFrom rlang enquo
-#' @importFrom rlang quo_is_symbol
 #' @importFrom purrr when
+#' @importFrom rlang quo_is_symbol
 #'
 #' @param .data A tibble
 #' @param .sample The name of the sample column
@@ -33,6 +32,7 @@ add_scaled_counts_bulk.get_low_expressed <- function(.data,
 	.transcript = enquo(.transcript)
 	.abundance = enquo(.abundance)
 
+
 	factor_of_interest = enquo(factor_of_interest)
 
 	# Check if factor_of_interest is continuous and exists
@@ -40,20 +40,15 @@ add_scaled_counts_bulk.get_low_expressed <- function(.data,
 
 		factor_of_interest %>%
 		when(
-
 			quo_is_symbol(.) &&
-				select(.data, !!(.)) %>%
-				lapply(class) %>%
-				as.character() %in% c("numeric", "integer", "double") ~ {
+				select(.data, !!(.)) %>% lapply(class) %>%	as.character() %in% c("numeric", "integer", "double") 	~ {
 					message("ppcseq says: The factor of interest is continuous (e.g., integer,numeric, double). The data will be filtered without grouping.")
 					NULL
 				},
-
 			quo_is_symbol(.) ~ .data %>%
 				distinct(!!.sample, !!factor_of_interest) %>%
 				arrange(!!.sample) %>%
 				pull(!!factor_of_interest),
-
 			~ NULL
 		)
 
@@ -77,7 +72,7 @@ add_scaled_counts_bulk.get_low_expressed <- function(.data,
 
 		# Call edgeR
 		as_matrix(rownames = !!.transcript) %>%
-		filterByExpr(
+		edgeR::filterByExpr(
 			min.count = minimum_counts,
 			group = string_factor_of_interest,
 			min.prop = minimum_proportion
@@ -88,7 +83,6 @@ add_scaled_counts_bulk.get_low_expressed <- function(.data,
 }
 
 # Set internal
-#' @importFrom purrr when
 .identify_abundant = 		function(.data,
 																.sample = NULL,
 																.transcript = NULL,
@@ -102,38 +96,45 @@ add_scaled_counts_bulk.get_low_expressed <- function(.data,
 	.transcript = enquo(.transcript)
 	.abundance = enquo(.abundance)
 
+
 	factor_of_interest = enquo(factor_of_interest)
 
 	.data %>%
 
-		{
-			gene_to_exclude =
-				add_scaled_counts_bulk.get_low_expressed(
-					.,
-					.sample = !!.sample,
-					.transcript = !!.transcript,
-					.abundance = !!.abundance,
-					factor_of_interest = !!factor_of_interest,
-					minimum_counts = minimum_counts,
-					minimum_proportion = minimum_proportion
-				)
-
-			dplyr::mutate(., .abundant := !!.transcript %in% gene_to_exclude %>% not())
-		}
+		# Filter
+		when(
+
+			# If column is present use this instead of doing more work
+			".abundant" %in% colnames(.) %>% not ~  {
+				gene_to_exclude =
+					add_scaled_counts_bulk.get_low_expressed(
+						.data,
+						.sample = !!.sample,
+						.transcript = !!.transcript,
+						.abundance = !!.abundance,
+						factor_of_interest = !!factor_of_interest,
+						minimum_counts = minimum_counts,
+						minimum_proportion = minimum_proportion
+					)
+
+				dplyr::mutate(., .abundant := !!.transcript %in% gene_to_exclude %>% not())
+			},
+			~ (.)
+		)
 }
 
 
 #' Get a tibble with scaled counts using TMM
 #'
 #' @keywords internal
+#' @noRd
 #'
 #' @import dplyr
 #' @import tibble
 #' @importFrom magrittr equals
 #' @importFrom rlang :=
 #' @importFrom stats median
-#' @importFrom purrr when
-#' @importFrom utils head
+#' @importFrom utils install.packages
 #'
 #' @param .data A tibble
 #' @param .sample The name of the sample column
@@ -142,6 +143,7 @@ add_scaled_counts_bulk.get_low_expressed <- function(.data,
 #' @param method A character string. The scaling method passed to the backend function (i.e., edgeR::calcNormFactors; "TMM","TMMwsp","RLE","upperquartile")
 #' @param reference_sample A character string. The name of the reference sample. If NULL the sample with highest total read count will be selected as reference.
 #'
+#'
 #' @return A tibble including additional columns
 #'
 #'
@@ -150,21 +152,29 @@ get_scaled_counts_bulk <- function(.data,
 																	 .transcript = NULL,
 																	 .abundance = NULL,
 																	 method = "TMM",
-																	 reference_sample = NULL) {
+																	 reference_sample = NULL,
+																	 .library_size = NULL) {
 	# Get column names
 	.sample = enquo(.sample)
 	.transcript = enquo(.transcript)
 	.abundance = enquo(.abundance)
 
-	# Reformat input data set
-	df <-
-		.data %>%
+	.library_size = enquo(.library_size)
 
-		# Rename
-		dplyr::select(!!.sample,!!.transcript,!!.abundance) %>%
+	# # Check if package is installed, otherwise install
+	# if (find.package("edgeR", quiet = TRUE) %>% length %>% equals(0)) {
+	# 	message("ppcseq says: Installing edgeR needed for analyses")
+	# 	if (!requireNamespace("BiocManager", quietly = TRUE))
+	# 		install.packages("BiocManager", repos = "https://cloud.r-project.org")
+	# 	BiocManager::install("edgeR", ask = FALSE)
+	# }
+
+	# Set factors
+	.data =
+		.data %>%
+		dplyr::mutate(!!.sample := factor(!!.sample),!!.transcript := factor(!!.transcript)) %>%
+		droplevels()
 
-		# Set samples and genes as factors
-		dplyr::mutate(!!.sample := factor(!!.sample),!!.transcript := factor(!!.transcript))
 
 
 	# Get reference
@@ -174,7 +184,7 @@ get_scaled_counts_bulk <- function(.data,
 			!is.null(.) ~ (.),
 
 			# If not specified take most abundance sample
-			df %>%
+			.data %>%
 				group_by(!!.sample) %>%
 				summarise(sum = median(!!.abundance)) %>%
 				mutate(med = max(sum)) %>%
@@ -188,18 +198,19 @@ get_scaled_counts_bulk <- function(.data,
 
 	nf_obj <-
 		add_scaled_counts_bulk.calcNormFactor(
-			df,
+			.data,
 			reference,
 			.sample = !!.sample,
 			.transcript = !!.transcript,
 			.abundance = !!.abundance,
-			method
+			method,
+			.library_size = !!.library_size
 		)
 
 	# Calculate normalization factors
 	nf_obj$nf %>%
 		dplyr::left_join(
-			df %>%
+			.data %>%
 				group_by(!!.sample) %>%
 				summarise(tot = sum(!!.abundance, na.rm = TRUE)) %>%
 				ungroup() %>%
@@ -232,14 +243,12 @@ get_scaled_counts_bulk <- function(.data,
 #' Calculate the norm factor with calcNormFactor from limma
 #'
 #' @keywords internal
+#' @noRd
 #'
 #' @import dplyr
-#' @importFrom  tidyr spread
-#' @importFrom  tidyr drop_na
 #' @import tibble
 #' @importFrom rlang :=
 #' @importFrom stats setNames
-#' @importFrom edgeR calcNormFactors
 #'
 #' @param .data A tibble
 #' @param reference A reference matrix, not sure if used anymore
@@ -255,26 +264,34 @@ add_scaled_counts_bulk.calcNormFactor <- function(.data,
 																									.sample = `sample`,
 																									.transcript = `transcript`,
 																									.abundance = `count`,
-																									method) {
+																									method,
+																									.library_size = NULL) {
 	.sample = enquo(.sample)
 	.transcript = enquo(.transcript)
 	.abundance = enquo(.abundance)
 
-	# Get data frame for the highly transcribed transcripts
-	df.filt <-
+	.library_size = enquo(.library_size)
+
+	# Force or not library size
+	lib.size =
 		.data %>%
-		# dplyr::filter(!(!!.transcript %in% gene_to_exclude)) %>%
-		droplevels() %>%
-		select(!!.sample, !!.transcript, !!.abundance)
+		when(
+			!quo_is_null(.library_size) ~ distinct(., !!.sample, !!.library_size) %>% arrange(!!.sample) %>% pull(!!.library_size),
+			~ NULL
+		)
+
+	# Get data frame for the highly transcribed transcripts
+	df.filt <-	.data %>%	select(!!.sample, !!.transcript, !!.abundance)
 
 	# scaled data set
 	nf =
 		tibble::tibble(
+
 			# Sample factor
 			sample = factor(levels(df.filt %>% pull(!!.sample))),
 
 			# scaled data frame
-			nf = calcNormFactors(
+			nf = edgeR::calcNormFactors(
 				df.filt %>%
 					tidyr::spread(!!.sample,!!.abundance) %>%
 					tidyr::drop_na() %>%
@@ -282,7 +299,8 @@ add_scaled_counts_bulk.calcNormFactor <- function(.data,
 				refColumn = which(reference == factor(levels(
 					df.filt %>% pull(!!.sample)
 				))),
-				method = method
+				method = method,
+				lib.size = lib.size
 			)
 		) %>%
 
diff --git a/R/utilities.R b/R/utilities.R
index 57d66a9..2d184d2 100644
--- a/R/utilities.R
+++ b/R/utilities.R
@@ -168,7 +168,7 @@ format_for_MPI = function(df, shards, .sample) {
 		# Add counts MPI rows indexes
 		group_by(idx_MPI) %>%
 		arrange(G) %>%
-		mutate(`read count MPI row` = seq_len(length.out=n())) %>%
+		mutate(`read_count_MPI_row` = seq_len(length.out=n())) %>%
 		ungroup
 
 }
@@ -259,7 +259,7 @@ vb_iterative = function(model,
 				iter = iter,
 				tol_rel_obj = tol_rel_obj,
 				#seed = 654321,
-				pars=c("counts_rng", "exposure_rate", "alpha_sub_1", additional_parameters_to_save),
+				pars=c("counts_rng", "alpha_sub_1", additional_parameters_to_save),
 				...
 			)
 			boolFalse <- TRUE
@@ -290,10 +290,13 @@ vb_iterative = function(model,
 #' @noRd
 find_optimal_number_of_chains = function(how_many_posterior_draws,
 																				 max_number_to_check = 100) {
-	foreach(cc = 2:max_number_to_check, .combine = bind_rows) %do%
-		{
-			tibble(chains = cc, tot = how_many_posterior_draws / cc + 150 * cc)
-		} %>%
+
+
+	2:max_number_to_check %>%
+		map(
+			~ tibble(chains = .x, tot = how_many_posterior_draws / .x + 150 * .x)
+		) %>%
+		reduce(bind_rows) %>%
 		filter(tot == tot %>% min) %>%
 		pull(chains)
 
@@ -307,6 +310,7 @@ find_optimal_number_of_chains = function(how_many_posterior_draws,
 #'
 #' @importFrom tibble rowid_to_column
 #' @importFrom purrr map
+#' @importFrom purrr reduce
 #'
 #' @param counts_MPI A matrix of read count information
 #' @param to_exclude A vector of oulier data points to exclude
@@ -315,38 +319,43 @@ find_optimal_number_of_chains = function(how_many_posterior_draws,
 #' @return A matrix
 #' @noRd
 get_outlier_data_to_exlude = function(counts_MPI, to_exclude, shards) {
-	# If there are genes to exclude
-	switch(
-		to_exclude %>% nrow %>% gt(0) %>% `!` %>% sum(1),
-		foreach(s =  seq_len(length.out=shards), .combine = full_join) %do% {
-			counts_MPI %>%
-				inner_join(to_exclude, by = c("S", "G")) %>%
-				filter(idx_MPI == s) %>%
-				distinct(idx_MPI, `read count MPI row`) %>%
-				rowid_to_column %>%
-				spread(idx_MPI, `read count MPI row`) %>%
-
-				# If a shard is empty create a dummy data set to avoid error
-				ifelse_pipe((.) %>% nrow == 0, ~ tibble(rowid = 1,!!as.symbol(s) := NA))
-
-		} %>%
-
-			# Anonymous function - Add length array to the first row for indexing in MPI
-			# Input: tibble
-			# Output: tibble
-			{
-				bind_rows((.) %>% map(function(x)
-					x %>% is.na %>% `!` %>% as.numeric %>% sum) %>% unlist,
-					(.))
-			} %>%
-
-			select(-rowid) %>%
-			replace(is.na(.), 0 %>% as.integer) %>%
-			as_matrix() %>% t,
-
-		# Otherwise
-		matrix(rep(0, shards))
-	)
+
+
+	to_exclude %>%
+		when(
+			# If there are genes to exclude
+			nrow(.) %>% gt(0) ~ 	seq_len(length.out = shards) %>%
+				map(~ {
+					s = .x
+					counts_MPI %>%
+						inner_join(to_exclude, by = c("S", "G")) %>%
+						filter(idx_MPI == s) %>%
+						distinct(idx_MPI, `read_count_MPI_row`) %>%
+						rowid_to_column %>%
+						spread(idx_MPI, `read_count_MPI_row`) %>%
+
+						# If a shard is empty create a dummy data set to avoid error
+						ifelse_pipe((.) %>% nrow == 0, ~ tibble(rowid = 1, !!as.symbol(s) := NA))
+				}) %>%
+				reduce(full_join, by = "rowid") %>%
+
+				# Anonymous function - Add length array to the first row for indexing in MPI
+				# Input: tibble
+				# Output: tibble
+				{
+					bind_rows((.) %>% map(function(x)
+						x %>% is.na %>% `!` %>% as.numeric %>% sum) %>% unlist,
+						(.))
+				} %>%
+
+				select(-rowid) %>%
+				replace(is.na(.), 0 %>% as.integer) %>%
+				as_matrix() %>% t,
+
+			# Otherwise
+			~ matrix(rep(0, shards))
+		)
+
 }
 
 #' function to pass initialisation values
@@ -431,7 +440,12 @@ produce_plots = function(.x,
 			))
 		)
 
-	max_y  = .x %>% summarise(a = max(!!as.symbol(.abundance)), b = max(.upper_2)) %>% as.numeric %>% max
+	max_y  = .x %>% summarise(a = max(!!as.symbol(.abundance)), b = max(.upper)) %>% as.numeric %>% max
+
+	# Scale
+	# .x =
+	# 	.x %>%
+	# 	mutate(across(c(.abundance, .lower_2, .upper_2), ~ .x * multiplier)	)
 
 	{
 		ggplot(data = .x, aes(
@@ -448,11 +462,11 @@ produce_plots = function(.x,
 		# 	)
 	} %>%
 		when(
-			".lower_2" %in% colnames(.x) ~ (.) +
+			".lower" %in% colnames(.x) ~ (.) +
 				geom_errorbar(aes(
-					ymin = `.lower_2`,
-					ymax = `.upper_2`,
-					color = `deleterious outliers`
+					ymin = `.lower`,
+					ymax = `.upper`,
+					color = `deleterious_outliers`
 				),
 				width = 0),
 			~ (.)
@@ -461,10 +475,10 @@ produce_plots = function(.x,
 		ifelse_pipe(
 			covariate %>% is.null %>% `!`,
 			~ .x + geom_point(aes(
-				size = `exposure rate`, fill = !!as.symbol(covariate)
+				size = exposure_rate, fill = !!as.symbol(covariate)
 			), shape = 21),
 			~ .x + geom_point(
-				aes(size = `exposure rate`),
+				aes(size = exposure_rate),
 				shape = 21,
 				fill = "black"
 			)
@@ -485,14 +499,14 @@ add_deleterious_if_covariate_exists = function(.data, X){
 						select(2) %>%
 						setNames("factor or interest") %>%
 						mutate(S = seq_len(length.out=n())) %>%
-						mutate(`is group right` = `factor or interest` > mean(`factor or interest`)) ,
+						mutate(`is_group_right` = `factor or interest` > mean(`factor or interest`)) ,
 					by = "S"
 				) %>%
 
-				mutate(`is group high` = (slope > 0 & `is group right`) |  (slope < 0 & !`is group right`)) %>%
+				mutate(`is group high` = (slope > 0 & `is_group_right`) |  (slope < 0 & !`is_group_right`)) %>%
 
 				# Check if outlier might be deleterious for the statistics
-				mutate(`deleterious outliers` = (!ppc) &
+				mutate(`deleterious_outliers` = (!ppc) &
 							 	(`is higher than mean` == `is group high`)),
 			~ (.)
 		)
@@ -537,14 +551,11 @@ merge_results = function(res_discovery, res_test, formula, .transcript, .abundan
 			!!.transcript,
 			!!.abundance,
 			!!.sample,
-			mean,
-			# `.lower_1`,
-			# `.upper_1`,
-			`exposure rate`,
-			slope_1 = slope,
+			slope_before_outlier_filtering = slope,
 			one_of(parse_formula(formula))
 		) %>%
 
+
 		# Attach results of tests
 		left_join(
 			res_test %>%
@@ -552,15 +563,16 @@ merge_results = function(res_discovery, res_test, formula, .transcript, .abundan
 					select(
 						S,
 						G,
-						mean_2 = mean,
-						.lower_2 = `.lower`,
-						.upper_2 = `.upper`,
-						slope_2 = slope,
-						ppc,
-						one_of(c("generated quantities", "deleterious outliers"))
+						exposure_rate, multiplier,
+						.lower = `.lower`,
+						.upper = `.upper`,
+						slope_after_outlier_filtering = slope,
+						posterior_predictive_check_succeded = ppc,
+						one_of(c("generated quantities", "deleterious_outliers"))
 					),
 				by = c("S", "G")
 		) %>%
+		suppressWarnings() %>%
 
 		# format results
 		format_results(formula, !!.transcript, !!.abundance, !!.sample, do_check_only_on_detrimental)
@@ -579,24 +591,18 @@ format_results = function(.data, formula, .transcript, .abundance, .sample, do_c
 	.data %>%
 
 		# Check if new package is installed with different sintax
-		ifelse_pipe(
-			packageVersion("tidyr") >= "0.8.3.9000",
-			~ .x %>% nest(`sample wise data` = c(-!!.transcript)),
-			~ .x %>%
-				group_by(!!.transcript) %>%
-				nest(`sample wise data` = -!!.transcript)
-		) %>%
+		nest(`sample_wise_data` = -!!.transcript) %>%
 
 		# Add summary statistics
-		mutate(`ppc samples failed` = map_int(`sample wise data`, ~ .x %>% pull(ppc) %>% `!` %>% sum)) %>%
+		mutate(`ppc_samples_failed` = map_int(`sample_wise_data`, ~ .x %>% pull(posterior_predictive_check_succeded) %>% `!` %>% sum)) %>%
 
 		# If deleterious detection add summary as well
 		ifelse_pipe(
 			do_check_only_on_detrimental,
 			~ .x %>%
 				mutate(
-					`tot deleterious outliers` =
-						map_int(`sample wise data`, ~ .x %>% pull(`deleterious outliers`) %>% sum)
+					`tot_deleterious_outliers` =
+						map_int(`sample_wise_data`, ~ .x %>% pull(`deleterious_outliers`) %>% sum)
 				)
 		)
 }
@@ -642,43 +648,6 @@ select_to_check_and_house_keeping = function(.data, .do_check, .significance, .t
 		}
 }
 
-
-#' add_exposure_rate
-#'
-#' @keywords internal
-#'
-#'
-#' @importFrom tidyr separate
-#'
-#' @param .data A data frame
-#' @param fit A fit object
-#'
-#' @return A `tbl`
-#'
-#' @noRd
-add_exposure_rate = function(.data, fit){
-
-	writeLines(sprintf("executing %s", "add_exposure_rate"))
-
-	.data %>%
-		left_join(
-			fit %>%
-				summary("exposure_rate") %$%
-				summary %>%
-				as_tibble(rownames = ".variable") %>%
-				separate(
-					.variable,
-					c(".variable", "S"),
-					sep = "[\\[,\\]]",
-					extra = "drop"
-				) %>%
-				mutate(S = S %>% as.integer) %>%
-				rename(`exposure rate` = mean) %>%
-				select(S, `exposure rate`),
-			by = "S"
-		)
-}
-
 check_if_within_posterior = function(.data, my_df, .do_check, .abundance){
 
 	writeLines(sprintf("executing %s", "check_if_within_posterior"))
@@ -728,6 +697,7 @@ fit_to_counts_rng = function(fit, adj_prob_theshold){
 						 extra = "drop") %>%
 		mutate(S = S %>% as.integer, G = G %>% as.integer) %>%
 		select(-one_of(c("n_eff", "Rhat", "khat"))) %>%
+		suppressWarnings() %>%
 		rename(`.lower` = (.) %>% ncol - 1,
 					 `.upper` = (.) %>% ncol)
 }
@@ -760,7 +730,7 @@ fit_to_counts_rng = function(fit, adj_prob_theshold){
 #' @return A `tbl`
 #'
 #' @noRd
-fit_to_counts_rng_approximated = function(fit, adj_prob_theshold, how_many_posterior_draws, truncation_compensation, cores, how_many_to_check){
+fit_to_counts_rng_approximated = function(fit, adj_prob_theshold, how_many_posterior_draws, truncation_compensation, cores, how_many_to_check, exposure_rate){
 
 
 	writeLines(sprintf("executing %s", "fit_to_counts_rng_approximated"))
@@ -775,7 +745,7 @@ fit_to_counts_rng_approximated = function(fit, adj_prob_theshold, how_many_poste
 	# %>% as.data.frame() %>% setNames(sprintf("sigma.%s", colnames(.) %>% gsub("V", "", .))) %>%
 	# 	as_tibble() %>% mutate(.draw = 1:n()) %>% gather(par, sigma, -.draw) %>% separate(par, c("par", "G"), sep="\\.") %>% select(-par)
 
-	draws_exposure = 	fit %>% rstan::extract("exposure_rate") %>% .[[1]]
+	# draws_exposure = 	fit %>% rstan::extract("exposure_rate") %>% .[[1]]
 
 	# %>% as.data.frame() %>% setNames(sprintf("exposure.%s", colnames(.) %>% gsub("V", "", .))) %>%
 	# 	as_tibble() %>% mutate(.draw = 1:n()) %>% gather(par, exposure, -.draw) %>% separate(par, c("par", "S"), sep="\\.") %>% select(-par)
@@ -787,10 +757,11 @@ fit_to_counts_rng_approximated = function(fit, adj_prob_theshold, how_many_poste
 				list(  S,G, truncation_compensation),
 				~ {
 
-					i_supersampled =	seq_len(length.out=sample(length(draws_mu[,..1, ..2]), how_many_posterior_draws, replace = TRUE ))
+					i_supersampled = sample(seq_len(length(draws_mu[,..1, ..2])), how_many_posterior_draws, replace = TRUE )
+
 					draws = rnbinom(
 						n = how_many_posterior_draws,
-						mu = exp(draws_mu[,..1, ..2][i_supersampled] + draws_exposure[,..1][i_supersampled]),
+						mu = exp(draws_mu[,..1, ..2][i_supersampled] + exposure_rate[..1]),
 						size = 1/exp(draws_sigma[,..2][i_supersampled]) * ..3
 					)
 					draws %>%
@@ -809,44 +780,6 @@ fit_to_counts_rng_approximated = function(fit, adj_prob_theshold, how_many_poste
 		mutate(.variable = "counts_rng") %>%
 		unnest(CI)
 
-	# draws_mu =
-	# 	fit %>% extract("lambda_log_param") %>% `[[` (1) %>% as.data.frame() %>% setNames(sprintf("mu.%s", colnames(.))) %>%
-	# 	as_tibble() %>% mutate(.draw = 1:n()) %>% gather(par, mu, -.draw) %>% separate(par, c("par", "S", "G"), sep="\\.") %>% select(-par)
-	# draws_sigma =
-	# 	fit %>% extract("sigma_raw") %>% `[[` (1) %>% as.data.frame() %>% setNames(sprintf("sigma.%s", colnames(.) %>% gsub("V", "", .))) %>%
-	# 	as_tibble() %>% mutate(.draw = 1:n()) %>% gather(par, sigma, -.draw) %>% separate(par, c("par", "G"), sep="\\.") %>% select(-par)
-	# draws_exposure =
-	# 	fit %>% extract("exposure_rate") %>% `[[` (1) %>% as.data.frame() %>% setNames(sprintf("exposure.%s", colnames(.) %>% gsub("V", "", .))) %>%
-	# 	as_tibble() %>% mutate(.draw = 1:n()) %>% gather(par, exposure, -.draw) %>% separate(par, c("par", "S"), sep="\\.") %>% select(-par)
-	#
-	# draws_mu %>%
-	# 	left_join(draws_sigma, by = c(".draw", "G")) %>%
-	# 	left_join(draws_exposure, by = c(".draw", "S")) %>%
-	# 	nest(data = -c(S, G)) %>%
-	# 	mutate(
-	# 		CI = map(
-	# 			data,
-	# 			~ {
-	# 				.x_supersampled = .x %>%	sample_n(how_many_posterior_draws, replace = TRUE)
-	# 				draws = rnbinom(n =how_many_posterior_draws,	mu = exp(.x_supersampled$mu + .x_supersampled$exposure),	size = 1/exp(.x_supersampled$sigma) * truncation_compensation	)
-	# 				draws %>%
-	# 					# Process quantile
-	# 					quantile(c(adj_prob_theshold, 1 - adj_prob_theshold)) %>%
-	# 					tibble::as_tibble(rownames="prop") %>%
-	# 					tidyr::spread(prop, value) %>%
-	# 					setNames(c(".lower", ".upper")) %>%
-	# 					# Add mean and sd
-	# 					dplyr::mutate(mean = mean(draws), sd = sd(draws))
-	# 			}
-	# 		)
-	# 	) %>%
-	# 	select(-data) %>%
-	# 	unnest(CI) %>%
-	#
-	# 	# Adapt to old dataset
-	# 	mutate(.variable = "counts_rng") %>%
-	# 	mutate(S = as.integer(S), G = as.integer(G))
-
 
 }
 
@@ -868,16 +801,15 @@ save_generated_quantities_in_case = function(.data, fit, save_generated_quantiti
 		)
 }
 
-check_columns_exist = function(.data, .sample, .transcript, .abundance, .significance, .do_check){
+check_columns_exist = function(.data, .sample, .transcript, .abundance, .significance){
 
 	# Prepare column same enquo
 	.sample = enquo(.sample)
 	.transcript = enquo(.transcript)
 	.abundance = enquo(.abundance)
 	.significance = enquo(.significance)
-	.do_check = enquo(.do_check)
 
-	columns = c(quo_name(.sample), quo_name(.transcript), quo_name(.abundance), quo_name(.significance), quo_name(.do_check))
+	columns = c(quo_name(.sample), quo_name(.transcript), quo_name(.abundance), quo_name(.significance))
 	if((!columns %in% (.data %>% colnames)) %>% any)
 		stop(
 			sprintf(
@@ -907,21 +839,14 @@ check_columns_exist = function(.data, .sample, .transcript, .abundance, .signifi
 #' @return A `tbl`
 #'
 #' @noRd
-check_if_any_NA = function(.data, .sample, .transcript, .abundance, .significance, .do_check, formula_columns){
-
-	# Prepare column same enquo
-	.sample = enquo(.sample)
-	.transcript = enquo(.transcript)
-	.abundance = enquo(.abundance)
-	.significance = enquo(.significance)
-	.do_check = enquo(.do_check)
+#'
+check_if_any_NA = function(.data, columns){
 
-	columns = c(quo_name(.sample), quo_name(.transcript), quo_name(.abundance), quo_name(.significance), quo_name(.do_check), formula_columns)
 
 	if(
 		.data %>%
 		drop_na(columns) %>%
-		nrow %>% st(	.data %>% nrow	)
+		nrow %>% st(      .data %>% nrow    )
 	)
 		stop(sprintf("There are NA values in you tibble for any of the column %s", paste(columns, collapse=", ")))
 }
@@ -1051,7 +976,6 @@ run_model = function(model, approximate_posterior_inference, chains, how_many_po
 			tol_rel_obj = tol_rel_obj,
 			pars = c(
 				"counts_rng",
-				"exposure_rate",
 				additional_parameters_to_save
 			)
 			#,
@@ -1071,7 +995,6 @@ run_model = function(model, approximate_posterior_inference, chains, how_many_po
 			init = inits_fx,
 			pars = c(
 				"counts_rng",
-				"exposure_rate",
 				additional_parameters_to_save
 			)
 		)
@@ -1162,10 +1085,10 @@ identify_outliers_1_step = function(.data,
 	#factor_of_interest = ifelse(parse_formula(formula) %>% length %>% `>` (0), parse_formula(formula)[1], "")
 
 	# Check if columns exist
-	check_columns_exist(.data, !!.sample, !!.transcript, !!.abundance, !!.significance, !!.do_check)
+	check_columns_exist(.data, !!.sample, !!.transcript, !!.abundance, !!.significance)
 
 	# Check if any column is NA or null
-	check_if_any_NA(.data, !!.sample, !!.transcript, !!.abundance, !!.significance, !!.do_check, parse_formula(formula))
+	check_if_any_NA(.data, c(quo_name(.sample), quo_name(.transcript), quo_name(.abundance), quo_name(.significance), quo_name(.do_check), parse_formula(formula)))
 
 	# Check is testing environment is supported
 	if (approximate_posterior_inference &	save_generated_quantities)
@@ -1314,8 +1237,8 @@ identify_outliers_1_step = function(.data,
 			`.lower`,
 			`.upper`,
 			ppc,
-			one_of(c("generated quantities", "deleterious outliers")),
-			`exposure rate`,
+			one_of(c("generated quantities", "deleterious_outliers")),
+			exposure_rate,
 			one_of(parse_formula(formula))
 		) %>%
 
@@ -1408,8 +1331,7 @@ do_inference = function(my_df,
 												X,
 												lambda_mu_mu,
 												cores,
-												exposure_rate_multiplier,
-												intercept_shift_scale,
+												sample_scaling,
 												additional_parameters_to_save,
 												adj_prob_theshold,
 												how_many_posterior_draws,
@@ -1435,7 +1357,7 @@ do_inference = function(my_df,
 	.do_check = enquo(.do_check)
 
 	# Check that the dataset is squared
-	if(my_df %>% distinct(!!.sample, !!.transcript) %>% count(!!.transcript) %>% count(n) %>% nrow %>% gt(1))
+	if(my_df %>% distinct(!!.sample, !!.transcript) %>% count(!!.transcript) %>% count(n, name = "nn") %>% nrow %>% gt(1))
 		stop("The input data frame does not represent a rectangular structure. Each transcript must be present in all samples.")
 
 	# Get the number of transcripts to check
@@ -1472,7 +1394,7 @@ do_inference = function(my_df,
 	# Setup dimensions of variables for the model
 	G = counts_MPI %>% distinct(G) %>% nrow()
 	S = counts_MPI %>% distinct(!!.sample) %>% nrow()
-	N = counts_MPI %>% distinct(idx_MPI,!!.abundance, `read count MPI row`) %>%  count(idx_MPI) %>% summarise(max(n)) %>% pull(1)
+	N = counts_MPI %>% distinct(idx_MPI,!!.abundance, `read_count_MPI_row`) %>%  count(idx_MPI) %>% summarise(max(n)) %>% pull(1)
 	M = counts_MPI %>% distinct(`start`, idx_MPI) %>% count(idx_MPI) %>% pull(n) %>% max
 	G_per_shard = counts_MPI %>% distinct(!!.transcript, idx_MPI) %>% count(idx_MPI) %>% pull(n) %>% as.array
 	n_shards = min(shards, counts_MPI %>% distinct(idx_MPI) %>% nrow)
@@ -1484,18 +1406,18 @@ do_inference = function(my_df,
 	# Read count object
 	counts =
 		counts_MPI %>%
-		distinct(idx_MPI,!!.abundance, `read count MPI row`)  %>%
+		distinct(idx_MPI,!!.abundance, `read_count_MPI_row`)  %>%
 		spread(idx_MPI,!!.abundance) %>%
-		select(-`read count MPI row`) %>%
+		select(-`read_count_MPI_row`) %>%
 		replace(is.na(.), 0 %>% as.integer) %>%
 		as_matrix() %>% t
 
 	# Indexes of the samples
 	sample_idx =
 		counts_MPI %>%
-		distinct(idx_MPI, S, `read count MPI row`)  %>%
+		distinct(idx_MPI, S, `read_count_MPI_row`)  %>%
 		spread(idx_MPI, S) %>%
-		select(-`read count MPI row`) %>%
+		select(-`read_count_MPI_row`) %>%
 		replace(is.na(.), 0 %>% as.integer) %>%
 		as_matrix() %>% t
 
@@ -1541,6 +1463,15 @@ do_inference = function(my_df,
 	# Dimension od the data package to pass to Stan
 	CP = ncol(counts_package)
 
+	exposure_rate =
+		sample_scaling %>%
+		left_join(
+			counts_MPI %>% distinct(!!.sample, S), by=quo_name(.sample)
+		) %>%
+		distinct(S, exposure_rate ) %>%
+		arrange(S) %>%
+		pull(exposure_rate)
+
 	# Run model
 	#writeLines(sprintf("- Roughly the memory allocation for the fit object is %s Gb", object.size(1:(S * how_many_to_check * how_many_posterior_draws))/1e9))
 
@@ -1575,7 +1506,6 @@ do_inference = function(my_df,
 				init = inits_fx,
 				pars = c(
 					"counts_rng",
-					"exposure_rate",
 					"alpha_sub_1",
 					additional_parameters_to_save
 				)
@@ -1587,16 +1517,13 @@ do_inference = function(my_df,
 
 		ifelse_pipe(
 			approximate_posterior_analysis,
-			~ .x %>% fit_to_counts_rng_approximated(adj_prob_theshold, how_many_posterior_draws, truncation_compensation, cores, how_many_to_check),
+			~ .x %>% fit_to_counts_rng_approximated(adj_prob_theshold, how_many_posterior_draws, truncation_compensation, cores, how_many_to_check, exposure_rate),
 			~ .x %>% fit_to_counts_rng(adj_prob_theshold)
 		) %>%
 
 		# If generated quantities are saved
 		save_generated_quantities_in_case(fit, save_generated_quantities) %>%
 
-		# Add exposure rate
-		#add_exposure_rate(fit) %>%
-
 		# Check if data is within posterior
 		check_if_within_posterior(my_df, .do_check, .abundance) %>%
 
@@ -1607,12 +1534,9 @@ do_inference = function(my_df,
 		add_deleterious_if_covariate_exists(X) %>%
 
 		# Add position in MPI package for next inference
-		left_join(counts_MPI %>% distinct(S, G, idx_MPI, `read count MPI row`),
+		left_join(counts_MPI %>% distinct(S, G, idx_MPI, `read_count_MPI_row`),
 							by = c("S", "G")) %>%
 
-		# Add exposure rate
-		add_exposure_rate(fit) %>%
-
 		# needed for the figure article
 		ifelse_pipe(pass_fit,	~ .x %>% add_attr(fit, "fit")	) %>%
 
diff --git a/README.Rmd b/README.Rmd
index 210bc15..860dbdb 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -10,7 +10,8 @@ knitr::opts_chunk$set( fig.path = "man/figures/")
 The input data set is a tidy representation of a differential gene transcript abundance analysis
 
 ```{r echo=FALSE, include=FALSE}
-library(tidyverse)
+library(dplyr)
+library(ggplot2)
 library(ppcseq)
 library(dplyr)
 library(magrittr)
@@ -47,8 +48,8 @@ You can identify anrtefactual calls from your differential transcribt anundance
 
 
 counts.ppc = 
-	counts %>%
-	mutate(is_significant = FDR < 0.01) %>%
+	counts |>
+	mutate(is_significant = FDR < 0.01) |>
 	identify_outliers(
 		formula = ~ Label,
 		.sample = sample, 
@@ -72,12 +73,12 @@ We can visualise the top five differentially transcribed genes
 
 ```{r }
 counts.ppc_plots = 
-	counts.ppc %>% 
+	counts.ppc |> 
 	plot_credible_intervals() 
 ```
 
 ```{r}
-counts.ppc_plots %>%
-	pull(plot) %>% 
-	.[seq_len(length.out=2)]
+counts.ppc_plots |>
+	pull(plot) |> 
+	head(2)
 ```
diff --git a/README.md b/README.md
index 50227fb..6817a90 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ writeLines(c( "CXX14FLAGS += -O3","CXX14FLAGS += -DSTAN_THREADS", "CXX14FLAGS +=
 close(fileConn)
 ```
 
-Then, install with 
+Then, install with
 
 ``` r
 devtools::install_github("stemangiola/ppcseq")
@@ -25,7 +25,8 @@ devtools::install_github("stemangiola/ppcseq")
 You can get the test dataset with
 
 ``` r
-ppcseq::counts 
+data("counts")
+counts 
 ```
 
     ## # A tibble: 394,821 x 9
@@ -51,8 +52,8 @@ anundance analysis, due to outliers.
 
 
 counts.ppc = 
-    ppcseq::counts %>%
-    mutate(is_significant = FDR < 0.01) %>%
+    counts |>
+    mutate(is_significant = FDR < 0.01) |>
     identify_outliers(
         formula = ~ Label,
         .sample = sample, 
@@ -72,23 +73,23 @@ counts.ppc
 ```
 
     ## # A tibble: 15 x 4
-    ##    symbol   `sample wise data` `ppc samples failed` `tot deleterious outliers`
-    ##    <chr>    <list>                            <int>                      <int>
-    ##  1 SLC16A12 <tibble [21 × 14]>                    0                          0
-    ##  2 CYP1A1   <tibble [21 × 14]>                    1                          1
-    ##  3 ART3     <tibble [21 × 14]>                    0                          0
-    ##  4 DIO2     <tibble [21 × 14]>                    0                          0
-    ##  5 OR51E2   <tibble [21 × 14]>                    0                          0
-    ##  6 MUC16    <tibble [21 × 14]>                    0                          0
-    ##  7 CCNA1    <tibble [21 × 14]>                    0                          0
-    ##  8 LYZ      <tibble [21 × 14]>                    1                          1
-    ##  9 PPM1H    <tibble [21 × 14]>                    0                          0
-    ## 10 SUSD5    <tibble [21 × 14]>                    0                          0
-    ## 11 TPRG1    <tibble [21 × 14]>                    0                          0
-    ## 12 EPB42    <tibble [21 × 14]>                    0                          0
-    ## 13 LRRC38   <tibble [21 × 14]>                    0                          0
-    ## 14 SUSD4    <tibble [21 × 14]>                    0                          0
-    ## 15 MMP8     <tibble [21 × 14]>                    0                          0
+    ##    symbol   sample_wise_data   ppc_samples_failed tot_deleterious_outliers
+    ##    <chr>    <list>                          <int>                    <int>
+    ##  1 SLC16A12 <tibble [21 × 12]>                  0                        0
+    ##  2 CYP1A1   <tibble [21 × 12]>                  1                        1
+    ##  3 ART3     <tibble [21 × 12]>                  0                        0
+    ##  4 DIO2     <tibble [21 × 12]>                  0                        0
+    ##  5 OR51E2   <tibble [21 × 12]>                  0                        0
+    ##  6 MUC16    <tibble [21 × 12]>                  0                        0
+    ##  7 CCNA1    <tibble [21 × 12]>                  0                        0
+    ##  8 LYZ      <tibble [21 × 12]>                  1                        1
+    ##  9 PPM1H    <tibble [21 × 12]>                  0                        0
+    ## 10 SUSD5    <tibble [21 × 12]>                  0                        0
+    ## 11 TPRG1    <tibble [21 × 12]>                  0                        0
+    ## 12 EPB42    <tibble [21 × 12]>                  0                        0
+    ## 13 LRRC38   <tibble [21 × 12]>                  0                        0
+    ## 14 SUSD4    <tibble [21 × 12]>                  0                        0
+    ## 15 MMP8     <tibble [21 × 12]>                  0                        0
 
 The new data frame contains plots for each gene
 
@@ -96,14 +97,14 @@ We can visualise the top five differentially transcribed genes
 
 ``` r
 counts.ppc_plots = 
-    counts.ppc %>% 
+    counts.ppc |> 
     plot_credible_intervals() 
 ```
 
 ``` r
-counts.ppc_plots %>%
-    pull(plot) %>% 
-    .[1:2]
+counts.ppc_plots |>
+    pull(plot) |> 
+    head(2)
 ```
 
     ## [[1]]
diff --git a/inst/stan/negBinomial_MPI.stan b/inst/stan/negBinomial_MPI.stan
index e412161..f54e293 100644
--- a/inst/stan/negBinomial_MPI.stan
+++ b/inst/stan/negBinomial_MPI.stan
@@ -165,8 +165,7 @@ data {
 	int<lower=0> how_many_to_check;
 
 	// For better adaptation
-	real exposure_rate_multiplier;
-	real intercept_shift_scale[2];
+  vector[S] exposure_rate;
 
 	// Truncation
 	real<lower=0> truncation_compensation;
@@ -185,8 +184,6 @@ parameters {
   real<lower=0> lambda_sigma;
   real lambda_skew;
 
-  vector<multiplier = exposure_rate_multiplier>[S] exposure_rate;
-
   // Gene-wise properties of the data
   row_vector[G] intercept;
   row_vector[how_many_to_check] alpha_sub_1;
@@ -225,10 +222,6 @@ model {
 
   sigma_raw ~ normal(sigma_slope * intercept + sigma_intercept,sigma_sigma);
 
-  // Exposure prior
-  exposure_rate ~ normal(0,1);
-  sum(exposure_rate) ~ normal(0, 0.001 * S);
-
 	//Gene-wise properties of the data
 	target += sum(map_rect(
 		lp_reduce ,
diff --git a/man/add_scaled_counts_bulk.calcNormFactor.Rd b/man/add_scaled_counts_bulk.calcNormFactor.Rd
deleted file mode 100644
index e3f6318..0000000
--- a/man/add_scaled_counts_bulk.calcNormFactor.Rd
+++ /dev/null
@@ -1,35 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/tidybulk.R
-\name{add_scaled_counts_bulk.calcNormFactor}
-\alias{add_scaled_counts_bulk.calcNormFactor}
-\title{Calculate the norm factor with calcNormFactor from limma}
-\usage{
-add_scaled_counts_bulk.calcNormFactor(
-  .data,
-  reference = NULL,
-  .sample = sample,
-  .transcript = transcript,
-  .abundance = count,
-  method
-)
-}
-\arguments{
-\item{.data}{A tibble}
-
-\item{reference}{A reference matrix, not sure if used anymore}
-
-\item{.sample}{The name of the sample column}
-
-\item{.transcript}{The name of the transcript/gene column}
-
-\item{.abundance}{The name of the transcript/gene abundance column}
-
-\item{method}{A string character. The scaling method passed to the backend function (i.e., edgeR::calcNormFactors; "TMM","TMMwsp","RLE","upperquartile")}
-}
-\value{
-A list including the filtered data frame and the normalization factors
-}
-\description{
-Calculate the norm factor with calcNormFactor from limma
-}
-\keyword{internal}
diff --git a/man/add_scaled_counts_bulk.get_low_expressed.Rd b/man/add_scaled_counts_bulk.get_low_expressed.Rd
deleted file mode 100644
index 41e3012..0000000
--- a/man/add_scaled_counts_bulk.get_low_expressed.Rd
+++ /dev/null
@@ -1,38 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/tidybulk.R
-\name{add_scaled_counts_bulk.get_low_expressed}
-\alias{add_scaled_counts_bulk.get_low_expressed}
-\title{Drop lowly transcribed genes for TMM normalization}
-\usage{
-add_scaled_counts_bulk.get_low_expressed(
-  .data,
-  .sample = sample,
-  .transcript = transcript,
-  .abundance = count,
-  factor_of_interest = NULL,
-  minimum_counts = 10,
-  minimum_proportion = 0.7
-)
-}
-\arguments{
-\item{.data}{A tibble}
-
-\item{.sample}{The name of the sample column}
-
-\item{.transcript}{The name of the transcript/gene column}
-
-\item{.abundance}{The name of the transcript/gene abundance column}
-
-\item{factor_of_interest}{The name of the column of the factor of interest}
-
-\item{minimum_counts}{A positive integer. Minimum counts required for at least some samples.}
-
-\item{minimum_proportion}{A real positive number between 0 and 1. It is the threshold of proportion of samples for each transcripts/genes that have to be characterised by a cmp bigger than the threshold to be included for scaling procedure.}
-}
-\value{
-A tibble filtered
-}
-\description{
-Drop lowly transcribed genes for TMM normalization
-}
-\keyword{internal}
diff --git a/man/figures/unnamed-chunk-9-1.png b/man/figures/unnamed-chunk-9-1.png
index 7360caf..71e5d5e 100644
Binary files a/man/figures/unnamed-chunk-9-1.png and b/man/figures/unnamed-chunk-9-1.png differ
diff --git a/man/figures/unnamed-chunk-9-2.png b/man/figures/unnamed-chunk-9-2.png
index 72f4c3e..5169cae 100644
Binary files a/man/figures/unnamed-chunk-9-2.png and b/man/figures/unnamed-chunk-9-2.png differ
diff --git a/man/get_scaled_counts_bulk.Rd b/man/get_scaled_counts_bulk.Rd
deleted file mode 100644
index 1ca6abb..0000000
--- a/man/get_scaled_counts_bulk.Rd
+++ /dev/null
@@ -1,35 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/tidybulk.R
-\name{get_scaled_counts_bulk}
-\alias{get_scaled_counts_bulk}
-\title{Get a tibble with scaled counts using TMM}
-\usage{
-get_scaled_counts_bulk(
-  .data,
-  .sample = NULL,
-  .transcript = NULL,
-  .abundance = NULL,
-  method = "TMM",
-  reference_sample = NULL
-)
-}
-\arguments{
-\item{.data}{A tibble}
-
-\item{.sample}{The name of the sample column}
-
-\item{.transcript}{The name of the transcript/gene column}
-
-\item{.abundance}{The name of the transcript/gene abundance column}
-
-\item{method}{A character string. The scaling method passed to the backend function (i.e., edgeR::calcNormFactors; "TMM","TMMwsp","RLE","upperquartile")}
-
-\item{reference_sample}{A character string. The name of the reference sample. If NULL the sample with highest total read count will be selected as reference.}
-}
-\value{
-A tibble including additional columns
-}
-\description{
-Get a tibble with scaled counts using TMM
-}
-\keyword{internal}
diff --git a/man/identify_outliers.Rd b/man/identify_outliers.Rd
index 2b9a860..bc136b8 100644
--- a/man/identify_outliers.Rd
+++ b/man/identify_outliers.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/ppcseq.R
+% Please edit documentation in R/methods.R
 \name{identify_outliers}
 \alias{identify_outliers}
 \title{identify_outliers main}
@@ -12,6 +12,7 @@ identify_outliers(
   .abundance,
   .significance,
   .do_check,
+  .scaling_factor = NULL,
   percent_false_positive_genes = 1,
   how_many_negative_controls = 500,
   approximate_posterior_inference = TRUE,
@@ -43,6 +44,8 @@ identify_outliers(
 
 \item{.do_check}{A column name as symbol. A column with a boolean indicating whether a transcript was identified as differentially abundant}
 
+\item{.scaling_factor}{In case the scaling factor must not be calculated (TMM method) using the input data but provided. It is useful, for example, for pseudobulk single-cell where the scaling might depend on sample sequencing depth for all cells rather than a particular cell type.}
+
 \item{percent_false_positive_genes}{A real between 0 and 100. It is the aimed percent of transcript being a false positive. For example, percent_false_positive_genes = 1 provide 1 percent of the calls for outlier containing transcripts that has actually not outliers.}
 
 \item{how_many_negative_controls}{An integer. How many transcript from the bottom non-significant should be taken for inferring the mean-overdispersion trend.}
@@ -72,7 +75,7 @@ identify_outliers(
 \item{adj_prob_theshold_2}{A boolean. Used for development and testing purposes}
 }
 \value{
-A nested tibble \code{tbl} with transcript-wise information: \verb{sample wise data} | plot | \verb{ppc samples failed} | \verb{tot deleterious outliers}
+A nested tibble \code{tbl} with transcript-wise information: \code{sample_wise_data} | plot | \verb{ppc samples failed} | \verb{tot deleterious_outliers}
 }
 \description{
 This function runs the data modeling and statistical test for the hypothesis that a transcript includes outlier biological replicate.
diff --git a/man/plot_credible_intervals.Rd b/man/plot_credible_intervals.Rd
index 2dcf1eb..7f0d5a3 100644
--- a/man/plot_credible_intervals.Rd
+++ b/man/plot_credible_intervals.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/ppcseq.R
+% Please edit documentation in R/methods.R
 \name{plot_credible_intervals}
 \alias{plot_credible_intervals}
 \title{plot_credible interval for theoretical data distributions}
diff --git a/tests/testthat/test-ppcSeq.R b/tests/testthat/test-ppcSeq.R
index 8721f95..4cf0a5f 100644
--- a/tests/testthat/test-ppcSeq.R
+++ b/tests/testthat/test-ppcSeq.R
@@ -7,8 +7,8 @@ if(Sys.info()[['sysname']] == "Linux"){
 test_that("VB post approx no correction",{
 
   res =
-    counts %>%
-    dplyr::mutate(  is_significant = ifelse(symbol %in% c("SLC16A12", "CYP1A1", "ART3"), TRUE, FALSE) ) %>%
+    counts |>
+    dplyr::mutate(  is_significant = ifelse(symbol %in% c("SLC16A12", "CYP1A1", "ART3"), TRUE, FALSE) ) |>
     ppcseq::identify_outliers(
       formula = ~ Label,
       sample, symbol, value,
@@ -19,7 +19,8 @@ test_that("VB post approx no correction",{
       approximate_posterior_inference =TRUE,
       approximate_posterior_analysis =TRUE,
       how_many_negative_controls = 50,
-      cores=1,pass_fit = TRUE
+      cores=1,
+      pass_fit = TRUE
     )
 
   expect_equal(