From 69a0da0a5957937bae64ba6242fa68b31fc79e34 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Sun, 4 Sep 2022 21:36:49 +0100 Subject: [PATCH] added script for lineage diff sensitivities --- .../embb/lineage_diff_sensitivities.R | 166 ++++++++++++++++++ .../gid/lineage_diff_sensitivities.R | 134 ++++++++++++++ .../katg/lineage_diff_sensitivities.R | 133 ++++++++++++++ .../rpob/lineage_diff_sensitivities.R | 133 ++++++++++++++ 4 files changed, 566 insertions(+) create mode 100644 scripts/plotting/plotting_thesis/embb/lineage_diff_sensitivities.R create mode 100644 scripts/plotting/plotting_thesis/gid/lineage_diff_sensitivities.R create mode 100644 scripts/plotting/plotting_thesis/katg/lineage_diff_sensitivities.R create mode 100644 scripts/plotting/plotting_thesis/rpob/lineage_diff_sensitivities.R diff --git a/scripts/plotting/plotting_thesis/embb/lineage_diff_sensitivities.R b/scripts/plotting/plotting_thesis/embb/lineage_diff_sensitivities.R new file mode 100644 index 0000000..d7e1833 --- /dev/null +++ b/scripts/plotting/plotting_thesis/embb/lineage_diff_sensitivities.R @@ -0,0 +1,166 @@ +#============= +# Data: Input +#============== +#source("~/git/LSHTM_analysis/config/embb.R") +source("~/git/LSHTM_analysis/config/gid.R") + +source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") + + +# Now we need to make a column that fill na in dst with value of dst_mode +df2 = merged_df2 +#table(df2$dst2) + +df2$dst2 = ifelse(is.na(df2$dst), df2$dst_mode, df2$dst) +df2$sens2 = ifelse(df2$dst2 == 1, "R", "S") +table(df2$sens2) + + +all_snps = unique(df2$mutationinformation) +all_snps_n = length(all_snps); all_snps_n + +all_samples_id = unique(df2$id) # different to nrows +all_samples_id_n = length(all_samples_id); all_samples_id_n # different to nrows + +sel_lineage = c("L1", "L2", "L3", "L4") +df2_lin = df2[df2$lineage%in%sel_lineage,] +sel_lin_snps = unique(df2_lin$mutationinformation) +sel_lin_snps_n = length(sel_lin_snps); sel_lin_snps_n + +sel_lin_samples_id = unique(df2_lin$id) +sel_lin_samples_id_n = length(sel_lin_samples_id);sel_lin_samples_id_n + +# are the snps that are not in L1-L4 unique to L5-L7 +left_snps = all_snps[!all_snps%in%sel_lin_snps] +left_snps_n = length(left_snps); left_snps_n + +if (all_snps_n == sel_lin_snps_n+left_snps_n){ + cat("PASS: left snps extracted for gene", tolower(gene)) +}else{ + stop("Abort: left snps count mismatch") +} + +left_snps +lef_snps_df = df2[df2$mutationinformation%in%left_snps,] +table(lef_snps_df$lineage) + +################################## +# selected lineage plos +cols_to_subset = c("mutationinformation" + , "lineage" + , "dst2" + , "sens2") + + +#----------------------------------------------- +# step 0: Subset a smaller df +#----------------------------------------------- +plot_df_gene = df2_lin[, cols_to_subset] + +#----------------------------------------------- +# step 1: Select muts for each target +#----------------------------------------------- +# embb +#sel_mutsP = c("D354N", "Y319D", "Y319D", "A962P", "S651N", "A201S") +# gid +sel_mutsP = c("P75R", "A19G", "A133P", "R154W", "R118L") #G30D) +# katg +#sel_mutsP = c("") +# rpob +#sel_mutsP = c("") +#----------------------------------------------- +# step 2: Subset data with just those genes +#----------------------------------------------- +plot_df_gene = plot_df_gene[plot_df_gene$mutationinformation%in%sel_mutsP,] +cat("\nnrow of plot_df:", nrow(plot_df_gene)) +table(plot_df_gene$sens2, plot_df_gene$lineage, plot_df_gene$mutationinformation) +#----------------------------------------------- +# step 3: Assign to plot_df +#----------------------------------------------- + +plot_df = plot_df_gene + +#----------------------------------------------- +# step 4: Add p-value +#----------------------------------------------- +plot_df$pval = NULL +for (i in sel_mutsP) { + #print (i) + s_mut = plot_df[plot_df$mutationinformation == i,] + #print(s_mut) + s_tab = table(s_mut$lineage, s_mut$sens2) + #print(s_tab) + #ft_pvalue_i = round(fisher.test(s_tab)$p.value, 3) + ft_pvalue_i = fisher.test(s_tab + #, workspace=2e9 + #, simulate.p.value=TRUE,B=1e7 + )$p.value + #print(ft_pvalue_i) + plot_df$pval[plot_df$mutationinformation == i] <- ft_pvalue_i + #print(s_tab) +} + +plot_df$pvalR = round(plot_df$pval, 3) + +# round P-values +plot_df$pvalRF = plot_df$pvalR +plot_df = dplyr::mutate(plot_df + , pvalRF = case_when(pvalRF == 0.05 ~ "P ." + , pvalRF <=0.0001 ~ 'P ****' + , pvalRF <=0.001 ~ 'P ***' + , pvalRF <=0.01 ~ 'P **' + , pvalRF <0.05 ~ 'P *' + , TRUE ~ 'ns')) + +plot_df + +head(plot_df) +table(plot_df$pvalR<0.05) + +#----------------------------------------------- +# step 5: Plot +#----------------------------------------------- +p_title = gene +ts = 8 +gls = 3 +DSplot = ggplot(plot_df, aes(x = lineage, + fill = sens2)) + + geom_bar(stat = 'count') + + scale_fill_manual(name = "" + # name = leg_title + , values = c("red", "blue") + #, labels = levels(sens2)) + )+ + facet_wrap(~mutationinformation + , scales = 'free_y' + #, ncol = 3 + , nrow = 1 + ) + + theme(legend.position = "top" + , plot.title = element_text(hjust = 0.5, size=15,face = "italic") + #, plot.title = element_blank() + + , strip.text = element_text(size=ts+2) + , axis.text.x = element_text(size=ts) + , axis.text.y = element_text(size=ts) + , axis.title.y = element_text(size=ts) + , legend.title = element_blank() + , axis.title.x = element_blank() + )+ + labs(title = paste0(p_title + #, ": sensitivity by lineage" + ), + y = 'Sample Count') #+ + #geom_text(aes(label = pvalRF, x = 2.5, y = ypos_label+0.75)) + # geom_blank(aes(y = ypos_label+1.25)) + +# geom_label(aes(label = pvalRF, x = 2.5, ypos_label+0.75), fill="white", size =gls) + +#======== +# Outplot +#======== +outdir_lin = "/home/pub/Work/LSHTM/Thesis_Plots/" +png(paste0(outdir_lin, tolower(gene), "_linDS_selected.png") + , width = 8 + , height = 3, units = "in", res = 300 ) +DSplot +dev.off() diff --git a/scripts/plotting/plotting_thesis/gid/lineage_diff_sensitivities.R b/scripts/plotting/plotting_thesis/gid/lineage_diff_sensitivities.R new file mode 100644 index 0000000..5e192df --- /dev/null +++ b/scripts/plotting/plotting_thesis/gid/lineage_diff_sensitivities.R @@ -0,0 +1,134 @@ +#============= +# Data: Input +#============== +#source("~/git/LSHTM_analysis/config/embb.R") +source("~/git/LSHTM_analysis/config/gid.R") + +source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") + + +# Now we need to make a column that fill na in dst with value of dst_mode +df2 = merged_df2 +#table(df2$dst2) + +df2$dst2 = ifelse(is.na(df2$dst), df2$dst_mode, df2$dst) +df2$sens2 = ifelse(df2$dst2 == 1, "R", "S") +table(df2$sens2) + + +all_snps = unique(df2$mutationinformation) +all_snps_n = length(all_snps); all_snps_n + +all_samples_id = unique(df2$id) # different to nrows +all_samples_id_n = length(all_samples_id); all_samples_id_n # different to nrows + +sel_lineage = c("L1", "L2", "L3", "L4") +df2_lin = df2[df2$lineage%in%sel_lineage,] +sel_lin_snps = unique(df2_lin$mutationinformation) +sel_lin_snps_n = length(sel_lin_snps); sel_lin_snps_n + +sel_lin_samples_id = unique(df2_lin$id) +sel_lin_samples_id_n = length(sel_lin_samples_id);sel_lin_samples_id_n + +# are the snps that are not in L1-L4 unique to L5-L7 +left_snps = all_snps[!all_snps%in%sel_lin_snps] +left_snps_n = length(left_snps); left_snps_n + +if (all_snps_n == sel_lin_snps_n+left_snps_n){ + cat("PASS: left snps extracted for gene", tolower(gene)) +}else{ + stop("Abort: left snps count mismatch") +} + +left_snps +lef_snps_df = df2[df2$mutationinformation%in%left_snps,] +table(lef_snps_df$lineage) + +################################## +# selected lineage plos +cols_to_subset = c("mutationinformation" + , "lineage" + , "dst2" + , "sens2") + + +#----------------------------------------------- +# step 0: Subset a smaller df +#----------------------------------------------- +plot_df_gene = df2_lin[, cols_to_subset] + +#----------------------------------------------- +# step 1: Select muts for each target +#----------------------------------------------- +# embb +#sel_mutsP = c("D354N", "Y319D", "Y319D", "A962P", "S651N", "A201S") +# gid +sel_mutsP = c("P75R", "A19G", "A133P", "R154W", "R118L") #G30D) +# katg +#sel_mutsP = c("") +# rpob +#sel_mutsP = c("") +#----------------------------------------------- +# step 2: Subset data with just those genes +#----------------------------------------------- +plot_df_gene = plot_df_gene[plot_df_gene$mutationinformation%in%sel_mutsP,] +cat("\nnrow of plot_df:", nrow(plot_df_gene)) +table(plot_df_gene$sens2, plot_df_gene$lineage, plot_df_gene$mutationinformation) +#----------------------------------------------- +# step 3: Assign to plot_df +#----------------------------------------------- + +plot_df = plot_df_gene + +#----------------------------------------------- +# step 4: Add p-value +#----------------------------------------------- + + +#----------------------------------------------- +# step 5: Plot +#----------------------------------------------- +p_title = gene +ts = 8 +gls = 3 +DSplot = ggplot(plot_df, aes(x = lineage, + fill = sens2)) + + geom_bar(stat = 'count') + + scale_fill_manual(name = "" + # name = leg_title + , values = c("red", "blue") + #, labels = levels(sens2)) + )+ + facet_wrap(~mutationinformation + , scales = 'free_y' + #, ncol = 3 + , nrow = 1 + ) + + theme(legend.position = "top" + , plot.title = element_text(hjust = 0.5, size=15,face = "italic") + #, plot.title = element_blank() + + , strip.text = element_text(size=ts+2) + , axis.text.x = element_text(size=ts) + , axis.text.y = element_text(size=ts) + , axis.title.y = element_text(size=ts) + , legend.title = element_blank() + , axis.title.x = element_blank() + )+ + labs(title = paste0(p_title + #, ": sensitivity by lineage" + ), + y = 'Sample Count') #+ + #geom_text(aes(label = pvalRF, x = 2.5, y = ypos_label+0.75)) + # geom_blank(aes(y = ypos_label+1.25)) + +# geom_label(aes(label = pvalRF, x = 2.5, ypos_label+0.75), fill="white", size =gls) + +#======== +# Outplot +#======== +outdir_lin = "/home/pub/Work/LSHTM/Thesis_Plots/" +png(paste0(outdir_lin, tolower(gene), "_linDS_selected.png") + , width = 8 + , height = 3, units = "in", res = 300 ) +DSplot +dev.off() diff --git a/scripts/plotting/plotting_thesis/katg/lineage_diff_sensitivities.R b/scripts/plotting/plotting_thesis/katg/lineage_diff_sensitivities.R new file mode 100644 index 0000000..46c9802 --- /dev/null +++ b/scripts/plotting/plotting_thesis/katg/lineage_diff_sensitivities.R @@ -0,0 +1,133 @@ +#============= +# Data: Input +#============== +source("~/git/LSHTM_analysis/config/katg.R") +source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") + + +# Now we need to make a column that fill na in dst with value of dst_mode +df2 = merged_df2 +#table(df2$dst2) + +df2$dst2 = ifelse(is.na(df2$dst), df2$dst_mode, df2$dst) +df2$sens2 = ifelse(df2$dst2 == 1, "R", "S") +table(df2$sens2) + + +all_snps = unique(df2$mutationinformation) +all_snps_n = length(all_snps); all_snps_n + +all_samples_id = unique(df2$id) # different to nrows +all_samples_id_n = length(all_samples_id); all_samples_id_n # different to nrows + +sel_lineage = c("L1", "L2", "L3", "L4") +df2_lin = df2[df2$lineage%in%sel_lineage,] +sel_lin_snps = unique(df2_lin$mutationinformation) +sel_lin_snps_n = length(sel_lin_snps); sel_lin_snps_n + +sel_lin_samples_id = unique(df2_lin$id) +sel_lin_samples_id_n = length(sel_lin_samples_id);sel_lin_samples_id_n + +# are the snps that are not in L1-L4 unique to L5-L7 +left_snps = all_snps[!all_snps%in%sel_lin_snps] +left_snps_n = length(left_snps); left_snps_n + +if (all_snps_n == sel_lin_snps_n+left_snps_n){ + cat("PASS: left snps extracted for gene", tolower(gene)) +}else{ + stop("Abort: left snps count mismatch") +} + +left_snps +lef_snps_df = df2[df2$mutationinformation%in%left_snps,] +table(lef_snps_df$lineage) + +################################## +#----------------------------------------------- +# step 0: Select muts for each target +#----------------------------------------------- +# embb +#sel_mutsP = c("D354N", "Y319D", "Y319D", "A962P", "S651N", "A201S") +# gid +# sel_mutsP = c("P75R", "A19G", "A133P", "R154W", "R118L") #G30D) +# katg +sel_mutsP = c("M257V", "G490S", "H116F", "H97N", "R249C", "W300R", "V320L", "S383A") #R463L +# rpob +#sel_mutsP = c("") + +# selected lineage plos +cols_to_subset = c("mutationinformation" + , "lineage" + , "dst2" + , "sens2") + + +#----------------------------------------------- +# step 1: Subset a smaller df +#----------------------------------------------- +plot_df_gene = df2_lin[, cols_to_subset] + +#----------------------------------------------- +# step 2: Subset data with just those genes +#----------------------------------------------- +plot_df_gene = plot_df_gene[plot_df_gene$mutationinformation%in%sel_mutsP,] +cat("\nnrow of plot_df:", nrow(plot_df_gene)) +table(plot_df_gene$sens2, plot_df_gene$lineage, plot_df_gene$mutationinformation) +#----------------------------------------------- +# step 3: Assign to plot_df +#----------------------------------------------- + +plot_df = plot_df_gene + +#----------------------------------------------- +# step 4: Add p-value +#----------------------------------------------- + + +#----------------------------------------------- +# step 5: Plot +#----------------------------------------------- +p_title = gene +ts = 8 +gls = 3 +DSplot = ggplot(plot_df, aes(x = lineage, + fill = sens2)) + + geom_bar(stat = 'count') + + scale_fill_manual(name = "" + # name = leg_title + , values = c("red", "blue") + #, labels = levels(sens2)) + )+ + facet_wrap(~mutationinformation + , scales = 'free_y' + #, ncol = 3 + , nrow = 2 + ) + + theme(legend.position = "top" + , plot.title = element_text(hjust = 0.5, size=15,face = "italic") + #, plot.title = element_blank() + + , strip.text = element_text(size=ts+2) + , axis.text.x = element_text(size=ts) + , axis.text.y = element_text(size=ts) + , axis.title.y = element_text(size=ts) + , legend.title = element_blank() + , axis.title.x = element_blank() + )+ + labs(title = paste0(p_title + #, ": sensitivity by lineage" + ), + y = 'Sample Count') #+ + #geom_text(aes(label = pvalRF, x = 2.5, y = ypos_label+0.75)) + # geom_blank(aes(y = ypos_label+1.25)) + +# geom_label(aes(label = pvalRF, x = 2.5, ypos_label+0.75), fill="white", size =gls) + +#======== +# Outplot +#======== +outdir_lin = "/home/pub/Work/LSHTM/Thesis_Plots/" +png(paste0(outdir_lin, tolower(gene), "_linDS_selected.png") + , width = 8 + , height = 4, units = "in", res = 300 ) +DSplot +dev.off() diff --git a/scripts/plotting/plotting_thesis/rpob/lineage_diff_sensitivities.R b/scripts/plotting/plotting_thesis/rpob/lineage_diff_sensitivities.R new file mode 100644 index 0000000..37b6bea --- /dev/null +++ b/scripts/plotting/plotting_thesis/rpob/lineage_diff_sensitivities.R @@ -0,0 +1,133 @@ +#============= +# Data: Input +#============== +source("~/git/LSHTM_analysis/config/rpob.R") +source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") + + +# Now we need to make a column that fill na in dst with value of dst_mode +df2 = merged_df2 +#table(df2$dst2) + +df2$dst2 = ifelse(is.na(df2$dst), df2$dst_mode, df2$dst) +df2$sens2 = ifelse(df2$dst2 == 1, "R", "S") +table(df2$sens2) + +all_snps = unique(df2$mutationinformation) +all_snps_n = length(all_snps); all_snps_n + +all_samples_id = unique(df2$id) # different to nrows +all_samples_id_n = length(all_samples_id); all_samples_id_n # different to nrows + +sel_lineage = c("L1", "L2", "L3", "L4") +df2_lin = df2[df2$lineage%in%sel_lineage,] +sel_lin_snps = unique(df2_lin$mutationinformation) +sel_lin_snps_n = length(sel_lin_snps); sel_lin_snps_n + +sel_lin_samples_id = unique(df2_lin$id) +sel_lin_samples_id_n = length(sel_lin_samples_id);sel_lin_samples_id_n + +# are the snps that are not in L1-L4 unique to L5-L7 +left_snps = all_snps[!all_snps%in%sel_lin_snps] +left_snps_n = length(left_snps); left_snps_n + +if (all_snps_n == sel_lin_snps_n+left_snps_n){ + cat("PASS: left snps extracted for gene", tolower(gene)) +}else{ + stop("Abort: left snps count mismatch") +} + +left_snps +lef_snps_df = df2[df2$mutationinformation%in%left_snps,] +table(lef_snps_df$lineage) + +################################## +#----------------------------------------------- +# step 0: Select muts for each target +#----------------------------------------------- +# embb +#sel_mutsP = c("D354N", "Y319D", "Y319D", "A962P", "S651N", "A201S") +# gid +# sel_mutsP = c("P75R", "A19G", "A133P", "R154W", "R118L") #G30D) +# katg +#sel_mutsP = c("M257V", "G490S", "H116F", "H97N", "R249C", "W300R", "V320L", "S383A") #R463L +# rpob +sel_mutsP = c("E807Q", "H674Q", "I696L", "P439S", "Q287E", "S188A", + "S195R", "S239R", "S431T", "T756A", "V243I", "V385L", "V403A", "V517K") +#K799Q, I491F +# selected lineage plos +cols_to_subset = c("mutationinformation" + , "lineage" + , "dst2" + , "sens2") + + +#----------------------------------------------- +# step 1: Subset a smaller df +#----------------------------------------------- +plot_df_gene = df2_lin[, cols_to_subset] + +#----------------------------------------------- +# step 2: Subset data with just those genes +#----------------------------------------------- +plot_df_gene = plot_df_gene[plot_df_gene$mutationinformation%in%sel_mutsP,] +cat("\nnrow of plot_df:", nrow(plot_df_gene)) +table(plot_df_gene$sens2, plot_df_gene$lineage, plot_df_gene$mutationinformation) +#----------------------------------------------- +# step 3: Assign to plot_df +#----------------------------------------------- + +plot_df = plot_df_gene + +#----------------------------------------------- +# step 4: Add p-value +#----------------------------------------------- + + +#----------------------------------------------- +# step 5: Plot +#----------------------------------------------- +p_title = gene +ts = 8 +gls = 3 +DSplot = ggplot(plot_df, aes(x = lineage, + fill = sens2)) + + geom_bar(stat = 'count') + + scale_fill_manual(name = "" + # name = leg_title + , values = c("red", "blue") + #, labels = levels(sens2)) + )+ + facet_wrap(~mutationinformation + , scales = 'free_y' + #, ncol = 3 + , nrow = 2 + ) + + theme(legend.position = "top" + , plot.title = element_text(hjust = 0.5, size=15,face = "italic") + #, plot.title = element_blank() + + , strip.text = element_text(size=ts+2) + , axis.text.x = element_text(size=ts) + , axis.text.y = element_text(size=ts) + , axis.title.y = element_text(size=ts) + , legend.title = element_blank() + , axis.title.x = element_blank() + )+ + labs(title = paste0(p_title + #, ": sensitivity by lineage" + ), + y = 'Sample Count') #+ + #geom_text(aes(label = pvalRF, x = 2.5, y = ypos_label+0.75)) + # geom_blank(aes(y = ypos_label+1.25)) + +# geom_label(aes(label = pvalRF, x = 2.5, ypos_label+0.75), fill="white", size =gls) + +#======== +# Outplot +#======== +outdir_lin = "/home/pub/Work/LSHTM/Thesis_Plots/" +png(paste0(outdir_lin, tolower(gene), "_linDS_selected.png") + , width = 8 + , height = 4, units = "in", res = 300 ) +DSplot +dev.off()