diff --git a/scripts/plotting/plotting_thesis/gid/lineage_diff_sensitivities.R b/scripts/plotting/plotting_thesis/embb/embb_lineage_diff_sensitivities.R similarity index 95% rename from scripts/plotting/plotting_thesis/gid/lineage_diff_sensitivities.R rename to scripts/plotting/plotting_thesis/embb/embb_lineage_diff_sensitivities.R index 5e192df..732d7ae 100644 --- a/scripts/plotting/plotting_thesis/gid/lineage_diff_sensitivities.R +++ b/scripts/plotting/plotting_thesis/embb/embb_lineage_diff_sensitivities.R @@ -45,20 +45,10 @@ lef_snps_df = df2[df2$mutationinformation%in%left_snps,] table(lef_snps_df$lineage) ################################## -# selected lineage plos -cols_to_subset = c("mutationinformation" - , "lineage" - , "dst2" - , "sens2") - - +# selected lineage plots +################################## #----------------------------------------------- -# step 0: Subset a smaller df -#----------------------------------------------- -plot_df_gene = df2_lin[, cols_to_subset] - -#----------------------------------------------- -# step 1: Select muts for each target +# step 0: Select muts for each target #----------------------------------------------- # embb #sel_mutsP = c("D354N", "Y319D", "Y319D", "A962P", "S651N", "A201S") @@ -69,6 +59,18 @@ sel_mutsP = c("P75R", "A19G", "A133P", "R154W", "R118L") #G30D) # rpob #sel_mutsP = c("") #----------------------------------------------- +# step 1: Subset a smaller df +#----------------------------------------------- +# selected lineage plos +cols_to_subset = c("mutationinformation" + , "lineage" + , "dst2" + , "sens2") + + + +plot_df_gene = df2_lin[, cols_to_subset] +#----------------------------------------------- # step 2: Subset data with just those genes #----------------------------------------------- plot_df_gene = plot_df_gene[plot_df_gene$mutationinformation%in%sel_mutsP,] @@ -82,9 +84,9 @@ plot_df = plot_df_gene #----------------------------------------------- # step 4: Add p-value +# NOT NEEDED, get it from lineage_diff_sensitivities.R if needed #----------------------------------------------- - #----------------------------------------------- # step 5: Plot #----------------------------------------------- diff --git a/scripts/plotting/plotting_thesis/gid/gid_lineage_diff_sensitivities.R b/scripts/plotting/plotting_thesis/gid/gid_lineage_diff_sensitivities.R new file mode 100644 index 0000000..0c271bf --- /dev/null +++ b/scripts/plotting/plotting_thesis/gid/gid_lineage_diff_sensitivities.R @@ -0,0 +1,136 @@ +#============= +# Data: Input +#============== +#source("~/git/LSHTM_analysis/config/embb.R") +source("~/git/LSHTM_analysis/config/gid.R") + +source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") + + +# Now we need to make a column that fill na in dst with value of dst_mode +df2 = merged_df2 +#table(df2$dst2) + +df2$dst2 = ifelse(is.na(df2$dst), df2$dst_mode, df2$dst) +df2$sens2 = ifelse(df2$dst2 == 1, "R", "S") +table(df2$sens2) + + +all_snps = unique(df2$mutationinformation) +all_snps_n = length(all_snps); all_snps_n + +all_samples_id = unique(df2$id) # different to nrows +all_samples_id_n = length(all_samples_id); all_samples_id_n # different to nrows + +sel_lineage = c("L1", "L2", "L3", "L4") +df2_lin = df2[df2$lineage%in%sel_lineage,] +sel_lin_snps = unique(df2_lin$mutationinformation) +sel_lin_snps_n = length(sel_lin_snps); sel_lin_snps_n + +sel_lin_samples_id = unique(df2_lin$id) +sel_lin_samples_id_n = length(sel_lin_samples_id);sel_lin_samples_id_n + +# are the snps that are not in L1-L4 unique to L5-L7 +left_snps = all_snps[!all_snps%in%sel_lin_snps] +left_snps_n = length(left_snps); left_snps_n + +if (all_snps_n == sel_lin_snps_n+left_snps_n){ + cat("PASS: left snps extracted for gene", tolower(gene)) +}else{ + stop("Abort: left snps count mismatch") +} + +left_snps +lef_snps_df = df2[df2$mutationinformation%in%left_snps,] +table(lef_snps_df$lineage) + +################################## +# selected lineage plots +################################## +#----------------------------------------------- +# step 0: Select muts for each target +#----------------------------------------------- +# embb +#sel_mutsP = c("D354N", "Y319D", "Y319D", "A962P", "S651N", "A201S") +# gid +sel_mutsP = c("P75R", "A19G", "A133P", "R154W", "R118L") #G30D) +# katg +#sel_mutsP = c("") +# rpob +#sel_mutsP = c("") +#----------------------------------------------- +# step 1: Subset a smaller df +#----------------------------------------------- +cols_to_subset = c("mutationinformation" + , "lineage" + , "dst2" + , "sens2") + + + +plot_df_gene = df2_lin[, cols_to_subset] +#----------------------------------------------- +# step 2: Subset data with just those genes +#----------------------------------------------- +plot_df_gene = plot_df_gene[plot_df_gene$mutationinformation%in%sel_mutsP,] +cat("\nnrow of plot_df:", nrow(plot_df_gene)) +table(plot_df_gene$sens2, plot_df_gene$lineage, plot_df_gene$mutationinformation) +#----------------------------------------------- +# step 3: Assign to plot_df +#----------------------------------------------- + +plot_df = plot_df_gene + +#----------------------------------------------- +# step 4: Add p-value +# NOT NEEDED, get it from lineage_diff_sensitivities.R if needed +#----------------------------------------------- + + +#----------------------------------------------- +# step 5: Plot +#----------------------------------------------- +p_title = gene +ts = 8 +gls = 3 +DSplot = ggplot(plot_df, aes(x = lineage, + fill = sens2)) + + geom_bar(stat = 'count') + + scale_fill_manual(name = "" + # name = leg_title + , values = c("red", "blue") + #, labels = levels(sens2)) + )+ + facet_wrap(~mutationinformation + , scales = 'free_y' + #, ncol = 3 + , nrow = 1 + ) + + theme(legend.position = "top" + , plot.title = element_text(hjust = 0.5, size=15,face = "italic") + #, plot.title = element_blank() + + , strip.text = element_text(size=ts+2) + , axis.text.x = element_text(size=ts) + , axis.text.y = element_text(size=ts) + , axis.title.y = element_text(size=ts) + , legend.title = element_blank() + , axis.title.x = element_blank() + )+ + labs(title = paste0(p_title + #, ": sensitivity by lineage" + ), + y = 'Sample Count') #+ + #geom_text(aes(label = pvalRF, x = 2.5, y = ypos_label+0.75)) + # geom_blank(aes(y = ypos_label+1.25)) + +# geom_label(aes(label = pvalRF, x = 2.5, ypos_label+0.75), fill="white", size =gls) + +#======== +# Outplot +#======== +outdir_lin = "/home/pub/Work/LSHTM/Thesis_Plots/" +png(paste0(outdir_lin, tolower(gene), "_linDS_selected.png") + , width = 8 + , height = 3, units = "in", res = 300 ) +DSplot +dev.off() diff --git a/scripts/plotting/plotting_thesis/katg/lineage_diff_sensitivities.R b/scripts/plotting/plotting_thesis/katg/katg_lineage_diff_sensitivities.R similarity index 97% rename from scripts/plotting/plotting_thesis/katg/lineage_diff_sensitivities.R rename to scripts/plotting/plotting_thesis/katg/katg_lineage_diff_sensitivities.R index 46c9802..400f599 100644 --- a/scripts/plotting/plotting_thesis/katg/lineage_diff_sensitivities.R +++ b/scripts/plotting/plotting_thesis/katg/katg_lineage_diff_sensitivities.R @@ -42,6 +42,8 @@ left_snps lef_snps_df = df2[df2$mutationinformation%in%left_snps,] table(lef_snps_df$lineage) +################################## +# selected lineage plots ################################## #----------------------------------------------- # step 0: Select muts for each target @@ -81,9 +83,9 @@ plot_df = plot_df_gene #----------------------------------------------- # step 4: Add p-value +# NOT NEEDED, get it from lineage_diff_sensitivities.R if needed #----------------------------------------------- - #----------------------------------------------- # step 5: Plot #----------------------------------------------- diff --git a/scripts/plotting/plotting_thesis/embb/lineage_diff_sensitivities.R b/scripts/plotting/plotting_thesis/lineage_diff_sensitivities.R similarity index 92% rename from scripts/plotting/plotting_thesis/embb/lineage_diff_sensitivities.R rename to scripts/plotting/plotting_thesis/lineage_diff_sensitivities.R index d7e1833..f7e2074 100644 --- a/scripts/plotting/plotting_thesis/embb/lineage_diff_sensitivities.R +++ b/scripts/plotting/plotting_thesis/lineage_diff_sensitivities.R @@ -1,15 +1,12 @@ #============= # Data: Input #============== -#source("~/git/LSHTM_analysis/config/embb.R") -source("~/git/LSHTM_analysis/config/gid.R") - +source("~/git/LSHTM_analysis/config/embb.R") source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") # Now we need to make a column that fill na in dst with value of dst_mode df2 = merged_df2 -#table(df2$dst2) df2$dst2 = ifelse(is.na(df2$dst), df2$dst_mode, df2$dst) df2$sens2 = ifelse(df2$dst2 == 1, "R", "S") @@ -45,39 +42,40 @@ lef_snps_df = df2[df2$mutationinformation%in%left_snps,] table(lef_snps_df$lineage) ################################## +# selected lineage plots +################################## +#----------------------------------------------- +# step 0: Select muts for each target +#----------------------------------------------- +# embb +#sel_mutsP = c("D354N", "Y319D", "Y319D", "A962P", "S651N", "A201S") +# gid +#sel_mutsP = c("") +# katg +#sel_mutsP = c("") +# rpob +#sel_mutsP = c("") # selected lineage plos + +#----------------------------------------------- +# step 1: Subset a smaller df +#----------------------------------------------- cols_to_subset = c("mutationinformation" , "lineage" , "dst2" , "sens2") - -#----------------------------------------------- -# step 0: Subset a smaller df -#----------------------------------------------- plot_df_gene = df2_lin[, cols_to_subset] -#----------------------------------------------- -# step 1: Select muts for each target -#----------------------------------------------- -# embb -#sel_mutsP = c("D354N", "Y319D", "Y319D", "A962P", "S651N", "A201S") -# gid -sel_mutsP = c("P75R", "A19G", "A133P", "R154W", "R118L") #G30D) -# katg -#sel_mutsP = c("") -# rpob -#sel_mutsP = c("") #----------------------------------------------- # step 2: Subset data with just those genes #----------------------------------------------- plot_df_gene = plot_df_gene[plot_df_gene$mutationinformation%in%sel_mutsP,] cat("\nnrow of plot_df:", nrow(plot_df_gene)) -table(plot_df_gene$sens2, plot_df_gene$lineage, plot_df_gene$mutationinformation) + #----------------------------------------------- # step 3: Assign to plot_df #----------------------------------------------- - plot_df = plot_df_gene #----------------------------------------------- @@ -160,7 +158,7 @@ DSplot = ggplot(plot_df, aes(x = lineage, #======== outdir_lin = "/home/pub/Work/LSHTM/Thesis_Plots/" png(paste0(outdir_lin, tolower(gene), "_linDS_selected.png") - , width = 8 - , height = 3, units = "in", res = 300 ) + , width = 4 + , height = 4, units = "in", res = 300 ) DSplot dev.off() diff --git a/scripts/plotting/plotting_thesis/rpob/lineage_diff_sensitivities.R b/scripts/plotting/plotting_thesis/rpob/rpob_lineage_diff_sensitivities.R similarity index 97% rename from scripts/plotting/plotting_thesis/rpob/lineage_diff_sensitivities.R rename to scripts/plotting/plotting_thesis/rpob/rpob_lineage_diff_sensitivities.R index 37b6bea..fe11e01 100644 --- a/scripts/plotting/plotting_thesis/rpob/lineage_diff_sensitivities.R +++ b/scripts/plotting/plotting_thesis/rpob/rpob_lineage_diff_sensitivities.R @@ -41,6 +41,8 @@ left_snps lef_snps_df = df2[df2$mutationinformation%in%left_snps,] table(lef_snps_df$lineage) +################################## +# selected lineage plots ################################## #----------------------------------------------- # step 0: Select muts for each target @@ -81,9 +83,9 @@ plot_df = plot_df_gene #----------------------------------------------- # step 4: Add p-value +# NOT NEEDED, get it from lineage_diff_sensitivities.R if needed #----------------------------------------------- - #----------------------------------------------- # step 5: Plot #-----------------------------------------------