diff --git a/scripts/plotting/scratch_plots/lineage_dist_dm_om_combined_PS.R b/scripts/plotting/lineage_dist_dm_om_combined_PS.R similarity index 100% rename from scripts/plotting/scratch_plots/lineage_dist_dm_om_combined_PS.R rename to scripts/plotting/lineage_dist_dm_om_combined_PS.R diff --git a/scripts/plotting/scratch_plots/lineage_dist_dm_om_PS.R b/scripts/plotting/scratch_plots/lineage_dist_dm_om_PS.R new file mode 100644 index 0000000..37e8cbe --- /dev/null +++ b/scripts/plotting/scratch_plots/lineage_dist_dm_om_PS.R @@ -0,0 +1,360 @@ +#!/usr/bin/env Rscript +######################################################### +# TASK: Lineage dist plots: ggridges + +# Output: 2 SVGs for PS stability + +# 1) all muts +# 2) dr_muts + +########################################################## +# Installing and loading required packages +########################################################## +getwd() +setwd("~/git/LSHTM_analysis/scripts/plotting/") +getwd() + +source("Header_TT.R") +library(ggridges) +source("combining_dfs_plotting.R") +# PS combined: +# 1) merged_df2 +# 2) merged_df2_comp +# 3) merged_df3 +# 4) merged_df3_comp + +# LIG combined: +# 5) merged_df2_lig +# 6) merged_df2_comp_lig +# 7) merged_df3_lig +# 8) merged_df3_comp_lig + +# 9) my_df_u +# 10) my_df_u_lig + +cat("Directories imported:" + , "\n====================" + , "\ndatadir:", datadir + , "\nindir:", indir + , "\noutdir:", outdir + , "\nplotdir:", plotdir) + +cat("Variables imported:" + , "\n=====================" + , "\ndrug:", drug + , "\ngene:", gene + , "\ngene_match:", gene_match + , "\nAngstrom symbol:", angstroms_symbol + , "\nNo. of duplicated muts:", dup_muts_nu + , "\nNA count for ORs:", na_count + , "\nNA count in df2:", na_count_df2 + , "\nNA count in df3:", na_count_df3 + , "\ndr_muts_col:", dr_muts_col + , "\nother_muts_col:", other_muts_col + , "\ndrtype_col:", resistance_col) + +cat("cols imported:" + , mcsm_red2, mcsm_red1, mcsm_mid, mcsm_blue1, mcsm_blue2) + +#======= +# output +#======= +lineage_dist_combined_dm_om = "lineage_dist_combined_dm_om_PS.svg" +plot_lineage_dist_combined_dm_om = paste0(plotdir,"/", lineage_dist_combined_dm_om) +#======================================================================== + +########################### +# Data for plots +# you need merged_df2 or merged_df2_comp +# since this is one-many relationship +# i.e the same SNP can belong to multiple lineages +# using the _comp dataset means +# we lose some muts and at this level, we should use +# as much info as available, hence use df with NA +########################### +# REASSIGNMENT +my_df = merged_df2 + +# delete variables not required +rm(my_df_u, merged_df2, merged_df2_comp, merged_df3, merged_df3_comp) + +# quick checks +colnames(my_df) +str(my_df) + +table(my_df$mutation_info) + +#=================== +# Data for plots +#=================== +table(my_df$lineage); str(my_df$lineage) + +# select lineages 1-4 +sel_lineages = c("lineage1" + , "lineage2" + , "lineage3" + , "lineage4") + #, "lineage5" + #, "lineage6" + #, "lineage7") + +# works nicely with facet wrap using labeller, but not otherwise +#my_labels = c('Lineage 1' +# , 'Lineage 2' +# , 'Lineage 3' +# , 'Lineage 4') +# #, 'Lineage 5' +# #, 'Lineage 6' +# #, 'Lineage 7') + +#names(my_labels) = c('lineage1' +# , 'lineage2' +# , 'lineage3' +# , 'lineage4') +# #, 'lineage5' +# #, 'lineage6' +# #, 'lineage7') + +#========================== +# subset selected lineages +#========================== +df_lin = subset(my_df, subset = lineage %in% sel_lineages) +table(df_lin$lineage) + +#{RESULT: Total number of samples for lineage} +sum(table(df_lin$lineage)) + +#{RESULT: No of samples within lineage} +table(df_lin$lineage) + +#{Result: No. of unique mutations the 4 lineages contribute to} +length(unique(df_lin$mutationinformation)) + +u2 = unique(my_df$mutationinformation) +u = unique(df_lin$mutationinformation) + +#{Result:Muts not present within selected lineages} +check = u2[!u2%in%u]; print(check) + +# workaround to make labels appear nicely for in otherwise cases +#================== +# lineage: labels +# from "plyr" +#================== +table(df_lin$lineage) + +df_lin$lineage_labels = mapvalues(df_lin$lineage + , from = c("lineage1","lineage2", "lineage3", "lineage4") + , to = c("Lineage 1", "Lineage 2", "Lineage 3", "Lineage 4")) +table(df_lin$lineage_labels) + +table(df_lin$lineage_labels) == table(df_lin$lineage) + +#======================== +# mutation_info: labels +#======================== +table(df_lin$mutation_info) + +df_lin$mutation_info_labels = ifelse(df_lin$mutation_info == dr_muts_col, "DM", "OM") +table(df_lin$mutation_info_labels) + +table(df_lin$mutation_info) == table(df_lin$mutation_info_labels) + +#======================= +# subset dr muts only +#======================= +my_df_dr = subset(df_lin, mutation_info == dr_muts_col) +table(my_df_dr$mutation_info) +table(my_df_dr$lineage) + +#========================= +# subset other muts only +#========================= +my_df_other = subset(df_lin, mutation_info == other_muts_col) +table(my_df_other$mutation_info) +table(my_df_other$lineage) + +######################################################################## +# end of data extraction and cleaning for plots # +######################################################################## + +#========================== +# Distribution plots +#============================ + +#%%%%%%%%%%%%%%%%%%%%%%%%% +# REASSIGNMENT +df <- df_lin +#%%%%%%%%%%%%%%%%%%%%%%%%% + +rm(df_lin) + +#****************** +# generate distribution plot of lineages +#****************** +# 2 : ggridges (good!) +my_ats = 15 # axis text size +my_als = 20 # axis label size +n_colours = length(unique(df$duet_scaled)) +my_palette <- colorRampPalette(c(mcsm_red2, mcsm_red1, mcsm_mid, mcsm_blue1, mcsm_blue2))(n = n_colours+1) + +#======================================= +# Plot 1: lineage dist: geom_density_ridges_gradient (allows aesthetics to vary along ridgeline, no alpha setting!) +# else same as geom_density_ridges) +# x = duet_scaled +# y = lineage_labels +# fill = duet_scaled +# NO FACET (nf) +#======================================= +# output individual svg +#plot_lineage_dist_duet_nf = paste0(plotdir,"/", "lineage_dist_duet_nf.svg") +#plot_lineage_dist_duet_nf +#svg(plot_lineage_dist_duet_nf) + +p1 = ggplot(df, aes(x = duet_scaled + , y = lineage_labels))+ + geom_density_ridges_gradient(aes(fill = ..x..) + #, jittered_points = TRUE + , scale = 3 + , size = 0.3 ) + + coord_cartesian( xlim = c(-1, 1)) + + scale_fill_gradientn(colours = my_palette, name = "DUET") + + theme(axis.text.x = element_text(size = my_ats + , angle = 90 + , hjust = 1 + , vjust = 0.4) + + , axis.text.y = element_text(size = my_ats) + , axis.title.x = element_text(size = my_ats) + , axis.title.y = element_blank() + , axis.ticks.y = element_blank() + , plot.title = element_blank() + , strip.text = element_text(size = my_als) + , legend.text = element_text(size = my_als-10) + , legend.title = element_text(size = my_als-3) + , legend.position = c(0.8, 0.8)) + + #, legend.direction = "horizontal")+ + #, legend.position = "top", )+ + labs(x = "DUET") + +p1 + +#p1_copy = p1 + guides(fill = guide_colourbar(label = FALSE)) +#p1_copy= p1_copy + guides(size=guide_legend("Source", override.aes=list(shape=15, size = 10))) + +#p1_copy +#======================================= +# Plot 2: lineage dist: geom_density_ridges, allows alpha to be set +# x = duet_scaled +# y = lineage_labels +# fill = mutation_info +# NO FACET +#======================================= +# output svg +#plot_lineage_dist_duet_dm_om = paste0(plotdir,"/", "lineage_dist_duet_dm_om.svg") +#plot_lineage_dist_duet_dm_om +#svg(plot_lineage_dist_duet_dm_om) + +p2 = ggplot(df, aes(x = duet_scaled + , y = lineage_labels))+ + geom_density_ridges(aes(fill = factor(mutation_info_labels)) + , scale = 3 + , size = 0.3 + , alpha = 0.8) + + coord_cartesian( xlim = c(-1, 1)) + + scale_fill_manual(values = c("#E69F00", "#999999")) + + theme(axis.text.x = element_text(size = my_ats + , angle = 90 + , hjust = 1 + , vjust = 0.4) + , axis.text.y = element_text(size = my_ats) + , axis.title.x = element_text(size = my_ats) + , axis.title.y = element_blank() + , axis.ticks.y = element_blank() + , plot.title = element_blank() + , strip.text = element_text(size = my_als) + , legend.text = element_text(size = my_als-2) + , legend.title = element_text(size = my_als-3) + , legend.position = c(0.8, 0.9)) + + labs(x = "DUET" + , fill = "Mutation class") # legend title + +p2 + +######################################################################## +#============== +# combine plot +#=============== +plot_lineage_dist_combined_dm_om +svg(plot_lineage_dist_combined_dm_om, width = 12, height = 6) + +printFile = cowplot::plot_grid(p1, p2 + , rel_widths = c(0.5/2, 0.5/2) + , label_size = my_als+10) + +print(printFile) +dev.off() + + +######################################################################## +# alternate combination +######################################################################## +#======================= +# Plot 3: lineage dist: geom_density_ridges_gradient (allows aesthetics to vary along ridgeline, no alpha setting!) +# else same as geom_density_ridges) +# x = duet_scaled +# y = duet_outcome +# FACET (f) = lineage +#======================= + +# output individual svg +#plot_lineage_dist_duet = paste0(plotdir,"/", "lineage_dist_duet_f.svg") +#plot_lineage_dist_duet +#svg(plot_lineage_dist_duet) + +p3 = ggplot(df, aes(x = duet_scaled + , y = duet_outcome))+ + + geom_density_ridges_gradient(aes(fill = ..x..) + , scale = 3 + , size = 0.3) + + + facet_wrap( ~lineage_labels + , scales = "free" + #, labeller = labeller(lineage = my_labels) # sorted by lineage_labels + ) + + coord_cartesian( xlim = c(-1, 1)) + + scale_fill_gradientn(colours = my_palette, name = "DUET") + + theme(axis.text.x = element_text(size = my_ats + , angle = 90 + , hjust = 1 + , vjust = 0.4) + + , axis.text.y = element_blank() + , axis.title.x = element_text(size = my_ats) + , axis.title.y = element_blank() + , axis.ticks.y = element_blank() + , plot.title = element_blank() + , strip.text = element_text(size = my_als) + , legend.text = element_text(size = my_als-6) + , legend.title = element_text(size = my_als-3))+ + labs(x = "DUET") + +print(p3) +#dev.off() + +#============== +# combine plot: alt version +#=============== +plot_lineage_dist_duet_fandnf = paste0(plotdir,"/", "lineage_dist_duet_fandnf.svg") +plot_lineage_dist_duet_fandnf +svg(plot_lineage_dist_duet_fandnf, width = 12, height = 6) + +printFile = cowplot::plot_grid(p3, p2 + , rel_widths = c(0.5/2, 0.5/2) + , label_size = my_als+10) + +print(printFile) +dev.off() + +