From ba02107e23f997d6580c5f0b8c0bbc18d9c7b397 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Wed, 16 Sep 2020 11:51:17 +0100 Subject: [PATCH] added dir scratch_plots/ to practice extra plots --- .../lineage_dist_combined_PS_style.R | 274 ++++++++++++++++++ 1 file changed, 274 insertions(+) create mode 100644 scripts/plotting/scratch_plots/lineage_dist_combined_PS_style.R diff --git a/scripts/plotting/scratch_plots/lineage_dist_combined_PS_style.R b/scripts/plotting/scratch_plots/lineage_dist_combined_PS_style.R new file mode 100644 index 0000000..a1073e8 --- /dev/null +++ b/scripts/plotting/scratch_plots/lineage_dist_combined_PS_style.R @@ -0,0 +1,274 @@ +#!/usr/bin/env Rscript +######################################################### +# TASK: Lineage dist plots: ggridges + +# Output: 2 SVGs for PS stability + +# 1) all muts +# 2) dr_muts + +########################################################## +# Installing and loading required packages +########################################################## +getwd() +setwd("~/git/LSHTM_analysis/scripts/plotting/") +getwd() + +source("Header_TT.R") +library(ggridges) +source("combining_dfs_plotting.R") +# PS combined: +# 1) merged_df2 +# 2) merged_df2_comp +# 3) merged_df3 +# 4) merged_df3_comp + +# LIG combined: +# 5) merged_df2_lig +# 6) merged_df2_comp_lig +# 7) merged_df3_lig +# 8) merged_df3_comp_lig + +# 9) my_df_u +# 10) my_df_u_lig + +cat("Directories imported:" + , "\n====================" + , "\ndatadir:", datadir + , "\nindir:", indir + , "\noutdir:", outdir + , "\nplotdir:", plotdir) + +cat("Variables imported:" + , "\n=====================" + , "\ndrug:", drug + , "\ngene:", gene + , "\ngene_match:", gene_match + , "\nAngstrom symbol:", angstroms_symbol + , "\nNo. of duplicated muts:", dup_muts_nu + , "\nNA count for ORs:", na_count + , "\nNA count in df2:", na_count_df2 + , "\nNA count in df3:", na_count_df3 + , "\ndr_muts_col:", dr_muts_col + , "\nother_muts_col:", other_muts_col + , "\ndrtype_col:", resistance_col) + +#======= +# output +#======= +#lineage_dist_dm_om = "lineage_dist_dm_om_PS.svg" +#plot_lineage_dist_dm_om = paste0(plotdir,"/", lineage_dist_dm_om) +#======================================================================== + +########################### +# Data for plots +# you need merged_df2 or merged_df2_comp +# since this is one-many relationship +# i.e the same SNP can belong to multiple lineages +# using the _comp dataset means +# we lose some muts and at this level, we should use +# as much info as available, hence use df with NA +########################### +# REASSIGNMENT +my_df = merged_df2 + +# delete variables not required +rm(my_df_u, merged_df2, merged_df2_comp, merged_df3, merged_df3_comp) + +# quick checks +colnames(my_df) +str(my_df) + +# Ensure correct data type in columns to plot: need to be factor +is.factor(my_df$lineage) +my_df$lineage = as.factor(my_df$lineage) +is.factor(my_df$lineage) + +table(my_df$mutation_info) + +#=================== +# Data for plots +#=================== +table(my_df$lineage); str(my_df$lineage) + +# select lineages 1-4 +sel_lineages = c("lineage1" + , "lineage2" + , "lineage3" + , "lineage4" + #, "lineage5" + #, "lineage6" + #, "lineage7" +) + + +my_labels = c('Lineage 1', 'Lineage 2', 'Lineage 3', 'Lineage 4' + #, 'Lineage 5', 'Lineage 6', 'Lineage 7' + ) +names(my_labels) = c('lineage1', 'lineage2', 'lineage3', 'lineage4' + # , 'lineage5', 'lineage6', 'lineage7' + ) + +# subset selected lineages +df_lin = subset(my_df, subset = lineage %in% sel_lineages ) +table(df_lin$lineage) + +# refactor +df_lin$lineage = factor(df_lin$lineage) + +#{RESULT: Total number of samples for lineage} +sum(table(df_lin$lineage)) + +#{RESULT: No of samples within lineage} +table(df_lin$lineage) + +#{Result: No. of unique mutations the 4 lineages contribute to} +length(unique(df_lin$mutationinformation)) + +u2 = unique(my_df$mutationinformation) +u = unique(df_lin$mutationinformation) + +#{Result:Muts not present within selected lineages} +check = u2[!u2%in%u]; print(check) + +#======================= +# subset dr muts only +#======================= +my_df_dr = subset(df_lin, mutation_info == dr_muts_col) +table(my_df_dr$mutation_info) +table(my_df_dr$lineage) + +#========================= +# subset other muts only +#========================= +my_df_other = subset(df_lin, mutation_info == other_muts_col) +table(my_df_other$mutation_info) +table(my_df_other$lineage) + +######################################################################## +# end of data extraction and cleaning for plots # +######################################################################## + +#========================== +# Plot 1: ALL Muts +# x = mcsm_values, y = dist +# fill = stability +#============================ + +#my_plot_name_dm_om = 'lineage_dist_DM_OM_PS.svg' +#plot_lineage_dm_om_duet = paste0(plotdir,"/", my_plot_name_dm_om) + +#%%%%%%%%%%%%%%%%%%%%%%%%% +# REASSIGNMENT +df <- df_lin +#%%%%%%%%%%%%%%%%%%%%%%%%% + +rm(df_lin) + +#****************** +# generate distribution plot of lineages +#****************** +# 2 : ggridges (good!) +my_ats = 15 # axis text size +my_als = 20 # axis label size + +#======================= +# Plot 1: lineage dist +#======================= +# check plot name +plot_lineage_duet + +n_colours = length(unique(df$duet_scaled)) + +#my_palette <- colorRampPalette(c(mcsm_red, "#f8766d", "white","#00bfc4", mcsm_blue))(n = 400) +my_palette <- colorRampPalette(c(mcsm_red2, mcsm_red1, mcsm_mid, mcsm_blue1, mcsm_blue2))(n = n_colours+1) + +# output svg +#svg(plot_lineage_duet) +p1 = ggplot(df, aes(x = duet_scaled + , y = duet_outcome))+ + geom_density_ridges_gradient(aes(fill = ..x..) + #, jittered_points = TRUE + , scale = 3 + , size = 0.3 ) + + facet_wrap( ~lineage + , scales = "free" + #, switch = 'x' + , labeller = labeller(lineage = my_labels)) + + coord_cartesian( xlim = c(-1, 1)) + + #scale_fill_gradientn(colours = c("#f8766d", "white", "#00bfc4") + # , name = "DUET" ) + + #scale_fill_gradientn(colours = c(mcsm_red, "#f8766d", "white","#00bfc4", mcsm_blue) + # , name = "DUET" ) + + scale_fill_gradientn(colours = my_palette, name = "DUET") + + theme(axis.text.x = element_text(size = my_ats + , angle = 90 + , hjust = 1 + , vjust = 0.4) + + , axis.text.y = element_blank() + , axis.title.x = element_blank() + , axis.title.y = element_blank() + , axis.ticks.y = element_blank() + , plot.title = element_blank() + , strip.text = element_text(size = my_als) + , legend.text = element_text(size = my_als-5) + , legend.title = element_text(size = my_als) + ) + +print(p1) +#dev.off() +#======================================= +# Plot 2: lineage dist by mutation_info +#======================================= +mutation_info_labels = c("DM", "OM") +names(mutation_info_labels) = c(dr_muts_col, other_muts_col) + +p3 = ggplot(df, aes(x = duet_scaled + , y = mutation_info))+ + geom_density_ridges_gradient(aes(fill = ..x..) + #, jittered_points = TRUE + , scale = 3 + , size = 0.3 ) + + facet_wrap(~lineage + # , scales = "free" + # #, switch = 'x' + , labeller = labeller(lineage = my_labels, mutation_info = mutation_info_labels)) + + coord_cartesian( xlim = c(-1, 1)) + + #scale_fill_gradientn(colours = c("#ae301e", "white", "#007d85") + # , name = "DUET" ) + + #scale_fill_gradientn(colours = c("mcsm_red", "white", "mcsm_blue") + # , name = "DUET" ) + + scale_fill_gradientn(colours = my_palette, name = "DUET") + + theme(axis.text.x = element_text(size = my_ats + , angle = 90 + , hjust = 1 + , vjust = 0.4) + + #, axis.text.y = element_blank() + , axis.title.x = element_blank() + , axis.title.y = element_blank() + , axis.ticks.y = element_blank() + , plot.title = element_blank() + , strip.text = element_text(size = my_als) + , legend.text = element_text(size = my_als-5) + , legend.title = element_text(size = my_als) + ) + +print(p3) + +#============================== + + +######################################################################## +#============== +# combine plot +#=============== + +svg(plot_lineage_dist_combined, width = 12, height = 6) + +printFile = cowplot::plot_grid(p1, p3 + , label_size = my_als+10) + +print(printFile) +dev.off()