From f5f1e388c3172572cfb11e6a2ae14212ac1778eb Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Fri, 12 Aug 2022 14:35:03 +0100 Subject: [PATCH] modified ks test to output all stats needed in one script --- scripts/ks_test_all_PS.R | 401 +++++++++++++----- .../plotting_thesis/corr_plots_thesis.R | 3 +- .../stats/embb_ks_lineage_all_comp.csv | 11 + 3 files changed, 309 insertions(+), 106 deletions(-) create mode 100644 scripts/plotting/plotting_thesis/stats/embb_ks_lineage_all_comp.csv diff --git a/scripts/ks_test_all_PS.R b/scripts/ks_test_all_PS.R index c02d67a..65a72c9 100755 --- a/scripts/ks_test_all_PS.R +++ b/scripts/ks_test_all_PS.R @@ -2,55 +2,26 @@ ######################################################### # TASK: KS test for PS/DUET lineage distributions #======================================================================= -#======================================================================= -# working dir and loading libraries -getwd() -setwd("~/git/LSHTM_analysis/scripts/") -getwd() - -#source("~/git/LSHTM_analysis/scripts/Header_TT.R") -#source("../barplot_colour_function.R") -#require(data.table) -source("plotting/combining_dfs_plotting.R") -# should return the following dfs, directories and variables - -# PS combined: -# 1) merged_df2 -# 2) merged_df2_comp -# 3) merged_df3 -# 4) merged_df3_comp - -# LIG combined: -# 5) merged_df2_lig -# 6) merged_df2_comp_lig -# 7) merged_df3_lig -# 8) merged_df3_comp_lig - -# 9) my_df_u -# 10) my_df_u_lig - -cat(paste0("Directories imported:" - , "\ndatadir:", datadir - , "\nindir:", indir - , "\noutdir:", outdir - , "\nplotdir:", plotdir)) - -cat(paste0("Variables imported:" - , "\ndrug:", drug - , "\ngene:", gene - , "\ngene_match:", gene_match - , "\nAngstrom symbol:", angstroms_symbol - , "\nNo. of duplicated muts:", dup_muts_nu - , "\nNA count for ORs:", na_count - , "\nNA count in df2:", na_count_df2 - , "\nNA count in df3:", na_count_df3)) +#!/usr/bin/env Rscript +#source("~/git/LSHTM_analysis/config/alr.R") +source("~/git/LSHTM_analysis/config/embb.R") +#source("~/git/LSHTM_analysis/config/katg.R") +#source("~/git/LSHTM_analysis/config/gid.R") +#source("~/git/LSHTM_analysis/config/pnca.R") +#source("~/git/LSHTM_analysis/config/rpob.R") +# get plottting dfs +source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") +source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") #============= # Output #============= +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") +outdir_stats = "~/git/LSHTM_analysis/scripts/plotting/plotting_thesis/stats/" + # ks test by lineage -ks_lineage = paste0(outdir, "/KS_lineage_all_muts.csv") +#ks_lineage = paste0(outdir, "/KS_lineage_all_muts.csv") ########################### # Data for stats @@ -81,14 +52,19 @@ str(my_df) table(my_df$lineage); str(my_df$lineage) # subset only lineages1-4 -sel_lineages = c("lineage1" - , "lineage2" - , "lineage3" - , "lineage4") +sel_lineages = c("L1" + , "L2" + , "L3" + , "L4") # subset selected lineages df_lin = subset(my_df, subset = lineage %in% sel_lineages) +table(df_lin$lineage) +table(df_lin$sensitivity) + +table(df_lin$lineage, df_lin$sensitivity) + #============== # dr_muts_col #============== @@ -106,16 +82,16 @@ df_lin = subset(my_df, subset = lineage %in% sel_lineages) #======================================================================= # individual: CHECKS -lin1 = df_lin[df_lin$lineage == "lineage1",]$duet_scaled -lin2 = df_lin[df_lin$lineage == "lineage2",]$duet_scaled -lin3 = df_lin[df_lin$lineage == "lineage3",]$duet_scaled -lin4 = df_lin[df_lin$lineage == "lineage4",]$duet_scaled +lin1 = df_lin[df_lin$lineage == "L1",]$avg_stability_scaled +lin2 = df_lin[df_lin$lineage == "L2",]$avg_stability_scaled +lin3 = df_lin[df_lin$lineage == "L3",]$avg_stability_scaled +lin4 = df_lin[df_lin$lineage == "L4",]$avg_stability_scaled ks.test(lin1, lin4) -ks.test(df_lin$duet_scaled[df_lin$lineage == "lineage1"] - , df_lin$duet_scaled[df_lin$lineage == "lineage2"]) +ks.test(df_lin$avg_stability_scaled[df_lin$lineage == "L2"] + , df_lin$avg_stability_scaled[df_lin$lineage == "L3"]) #======================================================================= my_lineages = levels(factor(df_lin$lineage)); my_lineages @@ -124,31 +100,43 @@ my_lineages = levels(factor(df_lin$lineage)); my_lineages #===================== # Lineage 1 comparisons #===================== -my_lin1 = "lineage1" -#my_lineages_comp_l1 = c("lineage2", "lineage3", "lineage4") +my_lin1 = "L1" +#my_lineages_comp_l1 = c("L2", "L3", "L4") my_lineages_comp_l1 = my_lineages[-match(my_lin1, my_lineages)] ks_df_l1 = data.frame() for (i in my_lineages_comp_l1){ cat(i) - l1_df = data.frame(group = NA, method = NA, ks_statistic = NA, ks_pvalue = NA) + l1_df = data.frame(comparison = NA, method = NA, ks_statistic = NA, ks_pvalue = NA + , n_samples = NA + , n_samples_total = NA) lineage_comp = paste0(my_lin1, " vs ", i) - ks_method = ks.test(df_lin$duet_scaled[df_lin$lineage == my_lin1] - , df_lin$duet_scaled[df_lin$lineage == i])$method + n_samples_lin = nrow(df_lin[df_lin$lineage == my_lin1,]) + n_samples_i = nrow(df_lin[df_lin$lineage == i,]) + n_samples_all = paste0(my_lin1, "(", n_samples_lin, ")" + , ", " + , i, "(", n_samples_i, ")") - ks_statistic = ks.test(df_lin$duet_scaled[df_lin$lineage == my_lin1] - , df_lin$duet_scaled[df_lin$lineage == i])$statistic + n_samples_total = n_samples_lin + n_samples_i + + ks_method = ks.test(df_lin$avg_stability_scaled[df_lin$lineage == my_lin1] + , df_lin$avg_stability_scaled[df_lin$lineage == i])$method - ks_pvalue = ks.test(df_lin$duet_scaled[df_lin$lineage == my_lin1] - , df_lin$duet_scaled[df_lin$lineage == i])$p.value + ks_statistic = ks.test(df_lin$avg_stability_scaled[df_lin$lineage == my_lin1] + , df_lin$avg_stability_scaled[df_lin$lineage == i])$statistic + + ks_pvalue = ks.test(df_lin$avg_stability_scaled[df_lin$lineage == my_lin1] + , df_lin$avg_stability_scaled[df_lin$lineage == i])$p.value # print(c(lineage_comp, ks_method, ks_statistic[[1]], ks_pval)) - l1_df$group = lineage_comp - l1_df$method = ks_method - l1_df$ks_statistic = ks_statistic[[1]] - l1_df$ks_pvalue = ks_pvalue + l1_df$comparison = lineage_comp + l1_df$method = ks_method + l1_df$ks_statistic = ks_statistic[[1]] + l1_df$ks_pvalue = ks_pvalue + l1_df$n_samples = n_samples_all + l1_df$n_samples_total= n_samples_total print(l1_df) ks_df_l1 = rbind(ks_df_l1,l1_df) @@ -159,31 +147,43 @@ for (i in my_lineages_comp_l1){ #===================== # Lineage 2 comparisons #===================== -my_lin2 = "lineage2" -#my_lineages_comp_l2 = c("lineage1", lineage3", "lineage4") +my_lin2 = "L2" +#my_lineages_comp_l2 = c("L1", lineage3", "L4") my_lineages_comp_l2 = my_lineages[-match(my_lin2, my_lineages)] ks_df_l2 = data.frame() for (i in my_lineages_comp_l2){ cat(i) - l2_df = data.frame(group = NA, method = NA, ks_statistic = NA, ks_pvalue = NA) + l2_df = data.frame(comparison = NA, method = NA, ks_statistic = NA, ks_pvalue = NA + , n_samples = NA + , n_samples_total = NA) lineage_comp = paste0(my_lin2, " vs ", i) - ks_method = ks.test(df_lin$duet_scaled[df_lin$lineage == my_lin2] - , df_lin$duet_scaled[df_lin$lineage == i])$method + n_samples_lin = nrow(df_lin[df_lin$lineage == my_lin2,]) + n_samples_i = nrow(df_lin[df_lin$lineage == i,]) + n_samples_all = paste0(my_lin2, "(", n_samples_lin, ")" + , ", " + , i, "(", n_samples_i, ")") - ks_statistic = ks.test(df_lin$duet_scaled[df_lin$lineage == my_lin2] - , df_lin$duet_scaled[df_lin$lineage == i])$statistic + n_samples_total = n_samples_lin + n_samples_i - ks_pvalue = ks.test(df_lin$duet_scaled[df_lin$lineage == my_lin2] - , df_lin$duet_scaled[df_lin$lineage == i])$p.value + ks_method = ks.test(df_lin$avg_stability_scaled[df_lin$lineage == my_lin2] + , df_lin$avg_stability_scaled[df_lin$lineage == i])$method + + ks_statistic = ks.test(df_lin$avg_stability_scaled[df_lin$lineage == my_lin2] + , df_lin$avg_stability_scaled[df_lin$lineage == i])$statistic + + ks_pvalue = ks.test(df_lin$avg_stability_scaled[df_lin$lineage == my_lin2] + , df_lin$avg_stability_scaled[df_lin$lineage == i])$p.value # print(c(lineage_comp, ks_method, ks_statistic[[1]], ks_pval)) - l2_df$group = lineage_comp - l2_df$method = ks_method - l2_df$ks_statistic = ks_statistic[[1]] - l2_df$ks_pvalue = ks_pvalue + l2_df$comparison = lineage_comp + l2_df$method = ks_method + l2_df$ks_statistic = ks_statistic[[1]] + l2_df$ks_pvalue = ks_pvalue + l2_df$n_samples = n_samples_all + l2_df$n_samples_total = n_samples_total print(l2_df) ks_df_l2 = rbind(ks_df_l2, l2_df) @@ -193,38 +193,52 @@ for (i in my_lineages_comp_l2){ #===================== # Lineage 3 comparisons #===================== -my_lin3 = "lineage3" -#my_lineages_comp_l3 = c("lineage1", lineage2", "lineage4") +my_lin3 = "L3" +#my_lineages_comp_l3 = c("L1", lineage2", "L4") my_lineages_comp_l3 = my_lineages[-match(my_lin3, my_lineages)] ks_df_l3 = data.frame() for (i in my_lineages_comp_l3){ cat(i) - l3_df = data.frame(group = NA, method = NA, ks_statistic = NA, ks_pvalue = NA) + l3_df = data.frame(comparison = NA, method = NA, ks_statistic = NA, ks_pvalue = NA + , n_samples = NA + , n_samples_total = NA) lineage_comp = paste0(my_lin3, " vs ", i) - ks_method = ks.test(df_lin$duet_scaled[df_lin$lineage == my_lin3] - , df_lin$duet_scaled[df_lin$lineage == i])$method + n_samples_lin = nrow(df_lin[df_lin$lineage == my_lin3,]) + n_samples_i = nrow(df_lin[df_lin$lineage == i,]) + n_samples_all = paste0(my_lin3, "(", n_samples_lin, ")" + , ", " + , i, "(", n_samples_i, ")") + n_samples_total = n_samples_lin + n_samples_i - ks_statistic = ks.test(df_lin$duet_scaled[df_lin$lineage == my_lin3] - , df_lin$duet_scaled[df_lin$lineage == i])$statistic + ks_method = ks.test(df_lin$avg_stability_scaled[df_lin$lineage == my_lin3] + , df_lin$avg_stability_scaled[df_lin$lineage == i])$method - ks_pvalue = ks.test(df_lin$duet_scaled[df_lin$lineage == my_lin3] - , df_lin$duet_scaled[df_lin$lineage == i])$p.value + ks_statistic = ks.test(df_lin$avg_stability_scaled[df_lin$lineage == my_lin3] + , df_lin$avg_stability_scaled[df_lin$lineage == i])$statistic + + ks_pvalue = ks.test(df_lin$avg_stability_scaled[df_lin$lineage == my_lin3] + , df_lin$avg_stability_scaled[df_lin$lineage == i])$p.value # print(c(lineage_comp, ks_method, ks_statistic[[1]], ks_pval)) - l3_df$group = lineage_comp - l3_df$method = ks_method - l3_df$ks_statistic = ks_statistic[[1]] - l3_df$ks_pvalue = ks_pvalue + l3_df$comparison = lineage_comp + l3_df$method = ks_method + l3_df$ks_statistic = ks_statistic[[1]] + l3_df$ks_pvalue = ks_pvalue + l3_df$n_samples = n_samples_all + l3_df$n_samples_total = n_samples_total print(l3_df) ks_df_l3 = rbind(ks_df_l3, l3_df) } +###################################################################################### + + #################################################################### -# combine all three ks_dfs +# combine all 4 ks_dfs n_dfs = 3 if ( all.equal(nrow(ks_df_l1), nrow(ks_df_l2), nrow(ks_df_l3)) && all.equal(ncol(ks_df_l1), ncol(ks_df_l2), ncol(ks_df_l3)) ){ @@ -249,11 +263,11 @@ if ( all.equal(nrow(ks_df_l1), nrow(ks_df_l2), nrow(ks_df_l3)) && , "\nCheck hardcoded value of n_dfs") } -#======================================================================= +#-------------- # formatting -#======================================================================= +#-------------- # add total_n number -ks_df_combined$total_samples_analysed = nrow(df_lin) +#ks_df_combined$total_samples_analysed = nrow(df_lin) # adding pvalue_signif ks_df_combined$pvalue_signif = ks_df_combined$ks_pvalue @@ -268,13 +282,192 @@ ks_df_combined = dplyr::mutate(ks_df_combined , TRUE ~ 'ns')) # Remove duplicates -rows_to_remove = c("lineage2 vs lineage1", "lineage3 vs lineage1", "lineage3 vs lineage2") +rows_to_remove = c("L2 vs L1", "L3 vs L1", "L3 vs L2") -ks_df_combined_f = ks_df_combined[-match(rows_to_remove, ks_df_combined$group),] +ks_df_combined_f = ks_df_combined[-match(rows_to_remove, ks_df_combined$comparison),] +################################################################################# +#================================ +# R vs S distribution: Overall +#================================= +df_lin_R = df_lin[df_lin$sensitivity == "R",] +df_lin_S = df_lin[df_lin$sensitivity == "S",] -#======================================================================= -#****************** -# write output file: KS test -#****************** -cat("Output of KS test bt lineage:", ks_lineage) -write.csv(ks_df_combined_f, ks_lineage, row.names = FALSE) +overall_RS_df = data.frame(comparison = NA, method = NA, ks_statistic = NA, ks_pvalue = NA + , n_samples = NA + , n_samples_total = NA) + +comp_type = "overall R vs S distributions" + +n_samples_R = nrow(df_lin_R) +n_samples_S = nrow(df_lin_S) +n_samples_all = paste0("R(n=", n_samples_R, ")" , " ", "S(n=", n_samples_S, ")") +n_samples_total = n_samples_R+n_samples_S + +ks.test(df_lin_R$avg_stability_scaled + , df_lin_S$avg_stability_scaled) + +ks_method = ks.test(df_lin_R$avg_stability_scaled + , df_lin_S$avg_stability_scaled)$method + +ks_statistic = ks.test(df_lin_R$avg_stability_scaled + , df_lin_S$avg_stability_scaled)$statistic + +ks_pvalue = ks.test(df_lin_R$avg_stability_scaled + , df_lin_S$avg_stability_scaled)$p.value + +overall_RS_df$comparison = comp_type +overall_RS_df$method = ks_method +overall_RS_df$ks_statistic = ks_statistic +overall_RS_df$ks_pvalue = ks_pvalue +overall_RS_df$n_samples = n_samples_all +overall_RS_df$n_samples_total= n_samples_total + +#-------------- +# formatting +#-------------- +overall_RS_df$pvalue_signif = overall_RS_df$ks_pvalue +overall_RS_df = dplyr::mutate(overall_RS_df + , pvalue_signif = case_when(pvalue_signif == 0.05 ~ "." + , pvalue_signif <=0.0001 ~ '****' + , pvalue_signif <=0.001 ~ '***' + , pvalue_signif <=0.01 ~ '**' + , pvalue_signif <0.05 ~ '*' + , TRUE ~ 'ns')) + +overall_RS_df +##################################################################### +if (all(colnames(ks_df_combined_f) == colnames(overall_RS_df))){ + + cat("\nPASS:combining KS test results") +}else{ + stop("\nAbort: Cannot combine results for KS test") +} + +ks_df_combined_f2 = rbind(ks_df_combined_f, overall_RS_df) + +# ADD extra cols +ks_df_combined_f2$ks_comp_type = "between_lineages" +ks_df_combined_f2$gene_name = tolower(gene) + +# #============================== +# # write output file: KS test +# #=============================== +# Out_lineage_bwL = paste0(outdir_stats +# , tolower(gene) +# , "_ks_lineage_bw.csv") +# +# cat("Output of KS test bt lineage:", Out_lineage_bwL) +# write.csv(ks_df_combined_f2, Out_lineage_bwL, row.names = FALSE) + +########################################################################### +#======================= +# Within lineage R vs S: MANUAL +#======================= +lin1 = df_lin[df_lin$lineage == my_lin1,] +ks.test(lin1$avg_stability_scaled[lin1$sensitivity == "R"] + , lin1$avg_stability_scaled[lin1$sensitivity == "S"]) + +lin2 = df_lin[df_lin$lineage == my_lin2,] +ks.test(lin2$avg_stability_scaled[lin2$sensitivity == "R"] + , lin2$avg_stability_scaled[lin2$sensitivity == "S"]) + + +lin3 = df_lin[df_lin$lineage == "L3",] +ks.test(lin3$avg_stability_scaled[lin3$sensitivity == "R"] + , lin3$avg_stability_scaled[lin3$sensitivity == "S"]) + +lin4 = df_lin[df_lin$lineage == "L4",] +ks.test(lin4$avg_stability_scaled[lin4$sensitivity == "R"] + , lin4$avg_stability_scaled[lin4$sensitivity == "S"]) +################################################################### +#======================= +# Within lineage R vs S: LOOP +#======================= +all_within_lin_df = data.frame() + +for (i in c(my_lin1 + , my_lin2 + , my_lin3 + )){ + #cat(i) + + within_lin_df = data.frame(comparison = NA, method = NA, ks_statistic = NA, ks_pvalue = NA + , n_samples = NA + , n_samples_total = NA) + + within_lineage_comp = paste0(i, ": R vs S") + my_lin_df = df_lin[df_lin$lineage == i,] + + lin_R_n = nrow(my_lin_df[my_lin_df$sensitivity == "R",]) + lin_S_n = nrow(my_lin_df[my_lin_df$sensitivity == "S",]) + lin_total_n = lin_R_n+lin_S_n + n_samples_all = paste0("R(n=", lin_R_n, ")" , ", ", "S(n=", lin_S_n, ")") + + ks_method = ks.test(my_lin_df$avg_stability_scaled[my_lin_df$sensitivity == "R"] + , my_lin_df$avg_stability_scaled[my_lin_df$sensitivity == "S"])$method + + ks_statistic = ks.test(my_lin_df$avg_stability_scaled[my_lin_df$sensitivity == "R"] + , my_lin_df$avg_stability_scaled[my_lin_df$sensitivity == "S"])$statistic + + ks_pvalue =ks.test(my_lin_df$avg_stability_scaled[my_lin_df$sensitivity == "R"] + , my_lin_df$avg_stability_scaled[my_lin_df$sensitivity == "S"])$p.value + + # print(c(lineage_comp, ks_method, ks_statistic[[1]], ks_pval)) + within_lin_df$comparison = within_lineage_comp + within_lin_df$method = ks_method + within_lin_df$ks_statistic = ks_statistic[[1]] + within_lin_df$ks_pvalue = ks_pvalue + within_lin_df$n_samples = n_samples_all + within_lin_df$n_samples_total=lin_total_n + + print(my_lin_df) + all_within_lin_df = rbind(all_within_lin_df, within_lin_df) + +} +all_within_lin_df + +all_within_lin_df$pvalue_signif = all_within_lin_df$ks_pvalue +str(all_within_lin_df$pvalue_signif) + +all_within_lin_df = dplyr::mutate(all_within_lin_df + , pvalue_signif = case_when(pvalue_signif == 0.05 ~ "." + , pvalue_signif <=0.0001 ~ '****' + , pvalue_signif <=0.001 ~ '***' + , pvalue_signif <=0.01 ~ '**' + , pvalue_signif <0.05 ~ '*' + , TRUE ~ 'ns')) + +all_within_lin_df + +# ADD extra cols +all_within_lin_df$ks_comp_type = "within_lineages" +all_within_lin_df$gene_name = tolower(gene) + +# #-------------------- +# # write output file: KS test within grpup +# #---------------------- +# Out_ks_withinL = paste0(outdir_stats +# , tolower(gene) +# , "_ks_lineage_within.csv") +# cat("Output of KS test within lineage:",Out_ks_withinL ) +# write.csv(all_within_lin_df, Out_ks_withinL, row.names = FALSE) + +################################################################## + +if (all(colnames(ks_df_combined_f2) == colnames(Out_ks_withinL))){ + + cat("\nPASS:combining KS test results") +}else{ + stop("\nAbort: Cannot combine results for KS test") +} + +ks_df_combined_all = rbind(ks_df_combined_f2, all_within_lin_df) + +#-------------------- +# write output file: KS test within grpup +#---------------------- +Out_ks_all = paste0(outdir_stats + , tolower(gene) + , "_ks_lineage_all_comp.csv") +cat("Output of KS test all comparisons:", Out_ks_all ) +write.csv(ks_df_combined_all, Out_ks_all, row.names = FALSE) \ No newline at end of file diff --git a/scripts/plotting/plotting_thesis/corr_plots_thesis.R b/scripts/plotting/plotting_thesis/corr_plots_thesis.R index 551fee3..2681ecb 100644 --- a/scripts/plotting/plotting_thesis/corr_plots_thesis.R +++ b/scripts/plotting/plotting_thesis/corr_plots_thesis.R @@ -321,14 +321,13 @@ svg('/tmp/foo.svg', width=10, height=10, ) corr_plotting_df = corr_df_ps -ggpairs(corr_plotting_df, columns = 1:(ncol(corr_plotting_df)-corr_end) +ggpairs(corr_plotting_df, columns = 1:(ncol(corr_plotting_df)-1) , upper = list(continuous = wrap('cor', method = "spearman")) , aes(colour = factor(dst_mode), alpha = 0.5) , title="correlogram with ggpairs()") + scale_colour_manual(values = c("red", "blue")) + scale_fill_manual(values = c("red", "blue")) - dev.off() diff --git a/scripts/plotting/plotting_thesis/stats/embb_ks_lineage_all_comp.csv b/scripts/plotting/plotting_thesis/stats/embb_ks_lineage_all_comp.csv new file mode 100644 index 0000000..cecdadd --- /dev/null +++ b/scripts/plotting/plotting_thesis/stats/embb_ks_lineage_all_comp.csv @@ -0,0 +1,11 @@ +"comparison","method","ks_statistic","ks_pvalue","n_samples","n_samples_total","pvalue_signif","ks_comp_type","gene_name" +"L1 vs L2","Two-sample Kolmogorov-Smirnov test",0.562081297123684,0,"L1(4013), L2(3554)",7567,"****","between_lineages","embb" +"L1 vs L3","Two-sample Kolmogorov-Smirnov test",0.521960420540757,0,"L1(4013), L3(692)",4705,"****","between_lineages","embb" +"L1 vs L4","Two-sample Kolmogorov-Smirnov test",0.414409376879199,0,"L1(4013), L4(5276)",9289,"****","between_lineages","embb" +"L2 vs L3","Two-sample Kolmogorov-Smirnov test",0.191637851675715,0,"L2(3554), L3(692)",4246,"****","between_lineages","embb" +"L2 vs L4","Two-sample Kolmogorov-Smirnov test",0.177978192411417,0,"L2(3554), L4(5276)",8830,"****","between_lineages","embb" +"L3 vs L4","Two-sample Kolmogorov-Smirnov test",0.110182657206589,7.08096017931759e-07,"L3(692), L4(5276)",5968,"****","between_lineages","embb" +"overall R vs S distributions","Two-sample Kolmogorov-Smirnov test",0.549577339308325,0,"R(n=6878) S(n=6657)",13535,"****","between_lineages","embb" +"L1: R vs S","Two-sample Kolmogorov-Smirnov test",0.654874747810162,0,"R(n=209), S(n=3804)",4013,"****","within_lineages","embb" +"L2: R vs S","Two-sample Kolmogorov-Smirnov test",0.368060163680602,0,"R(n=3014), S(n=540)",3554,"****","within_lineages","embb" +"L3: R vs S","Two-sample Kolmogorov-Smirnov test",0.473750449478605,0,"R(n=486), S(n=206)",692,"****","within_lineages","embb"