diff --git a/scripts/plotting/redundant/lineage_dist_combined_PS.R b/scripts/plotting/redundant/lineage_dist_combined_PS.R new file mode 100755 index 0000000..bf1c75b --- /dev/null +++ b/scripts/plotting/redundant/lineage_dist_combined_PS.R @@ -0,0 +1,303 @@ +#!/usr/bin/env Rscript +######################################################### +# TASK: Lineage dist plots: ggridges + +# Output: 2 SVGs for PS stability + +# 1) all muts +# 2) dr_muts + +########################################################## +# Installing and loading required packages +########################################################## +getwd() +setwd("~/git/LSHTM_analysis/scripts/plotting/") +getwd() + +source("Header_TT.R") +library(ggridges) +source("combining_dfs_plotting.R") +# PS combined: +# 1) merged_df2 +# 2) merged_df2_comp +# 3) merged_df3 +# 4) merged_df3_comp + +# LIG combined: +# 5) merged_df2_lig +# 6) merged_df2_comp_lig +# 7) merged_df3_lig +# 8) merged_df3_comp_lig + +# 9) my_df_u +# 10) my_df_u_lig + +cat("Directories imported:" + , "\n====================" + , "\ndatadir:", datadir + , "\nindir:", indir + , "\noutdir:", outdir + , "\nplotdir:", plotdir) + +cat("Variables imported:" + , "\n=====================" + , "\ndrug:", drug + , "\ngene:", gene + , "\ngene_match:", gene_match + , "\nAngstrom symbol:", angstroms_symbol + , "\nNo. of duplicated muts:", dup_muts_nu + , "\nNA count for ORs:", na_count + , "\nNA count in df2:", na_count_df2 + , "\nNA count in df3:", na_count_df3 + , "\ndr_muts_col:", dr_muts_col + , "\nother_muts_col:", other_muts_col + , "\ndrtype_col:", resistance_col) + +#======= +# output +#======= +lineage_dist_combined = "lineage_dist_combined_PS.svg" +plot_lineage_dist_combined = paste0(plotdir,"/", lineage_dist_combined) +#======================================================================== + +########################### +# Data for plots +# you need merged_df2 or merged_df2_comp +# since this is one-many relationship +# i.e the same SNP can belong to multiple lineages +# using the _comp dataset means +# we lose some muts and at this level, we should use +# as much info as available, hence use df with NA +########################### +# REASSIGNMENT +my_df = merged_df2 + +# delete variables not required +rm(my_df_u, merged_df2, merged_df2_comp, merged_df3, merged_df3_comp) + +# quick checks +colnames(my_df) +str(my_df) + +# Ensure correct data type in columns to plot: need to be factor +is.factor(my_df$lineage) +my_df$lineage = as.factor(my_df$lineage) +is.factor(my_df$lineage) + +table(my_df$mutation_info) + +# subset df with dr muts only +my_df_dr = subset(my_df, mutation_info == "dr_mutations_pyrazinamide") +table(my_df_dr$mutation_info) + +######################################################################## +# end of data extraction and cleaning for plots # +######################################################################## + +#========================== +# Plot 1: ALL Muts +# x = mcsm_values, y = dist +# fill = stability +#============================ + +my_plot_name = 'lineage_dist_PS.svg' + +plot_lineage_duet = paste0(plotdir,"/", my_plot_name) + +#=================== +# Data for plots +#=================== +table(my_df$lineage); str(my_df$lineage) + +# subset only lineages1-4 +sel_lineages = c("lineage1" + , "lineage2" + , "lineage3" + , "lineage4" + #, "lineage5" + #, "lineage6" + #, "lineage7" + ) + +# uncomment as necessary +df_lin = subset(my_df, subset = lineage %in% sel_lineages ) +table(df_lin$lineage) + +# refactor +df_lin$lineage = factor(df_lin$lineage) + +sum(table(df_lin$lineage)) #{RESULT: Total number of samples for lineage} + +table(df_lin$lineage)#{RESULT: No of samples within lineage} + +length(unique(df_lin$mutationinformation))#{Result: No. of unique mutations the 4 lineages contribute to} + +length(df_lin$mutationinformation) + +u2 = unique(my_df$mutationinformation) +u = unique(df_lin$mutationinformation) +check = u2[!u2%in%u]; print(check) #{Muts not present within selected lineages} + +#%%%%%%%%%%%%%%%%%%%%%%%%% +# REASSIGNMENT +df <- df_lin +#%%%%%%%%%%%%%%%%%%%%%%%%% + +rm(df_lin) + +#****************** +# generate distribution plot of lineages +#****************** +# 2 : ggridges (good!) +my_ats = 15 # axis text size +my_als = 20 # axis label size + +my_labels = c('Lineage 1', 'Lineage 2', 'Lineage 3', 'Lineage 4' + #, 'Lineage 5', 'Lineage 6', 'Lineage 7' + ) +names(my_labels) = c('lineage1', 'lineage2', 'lineage3', 'lineage4' + # , 'lineage5', 'lineage6', 'lineage7' + ) +# check plot name +plot_lineage_duet + +# output svg +#svg(plot_lineage_duet) +p1 = ggplot(df, aes(x = duet_scaled + , y = duet_outcome))+ + + #printFile=geom_density_ridges_gradient( + geom_density_ridges_gradient(aes(fill = ..x..) + #, jittered_points = TRUE + , scale = 3 + , size = 0.3 ) + + facet_wrap( ~lineage + , scales = "free" + #, switch = 'x' + , labeller = labeller(lineage = my_labels) ) + + coord_cartesian( xlim = c(-1, 1)) + + scale_fill_gradientn(colours = c("#f8766d", "white", "#00bfc4") + , name = "DUET" ) + + theme(axis.text.x = element_text(size = my_ats + , angle = 90 + , hjust = 1 + , vjust = 0.4) + + , axis.text.y = element_blank() + , axis.title.x = element_blank() + , axis.title.y = element_blank() + , axis.ticks.y = element_blank() + , plot.title = element_blank() + , strip.text = element_text(size = my_als) + , legend.text = element_text(size = my_als-5) + , legend.title = element_text(size = my_als) +) + +print(p1) +#dev.off() + +####################################################################### +# lineage distribution plot for dr_muts +####################################################################### + +#========================== +# Plot 2: dr muts ONLY +# x = mcsm_values, y = dist +# fill = stability +#============================ + +my_plot_name_dr = 'lineage_dist_dr_muts_PS.svg' + +plot_lineage_dr_duet = paste0(plotdir,"/", my_plot_name_dr) + +#=================== +# Data for plots +#=================== +table(my_df_dr$lineage); str(my_df_dr$lineage) + +# uncomment as necessary +df_lin_dr = subset(my_df_dr, subset = lineage %in% sel_lineages) +table(df_lin_dr$lineage) + +# refactor +df_lin_dr$lineage = factor(df_lin_dr$lineage) + +sum(table(df_lin_dr$lineage)) #{RESULT: Total number of samples for lineage} + +table(df_lin_dr$lineage)#{RESULT: No of samples within lineage} + +length(unique(df_lin_dr$mutationinformation))#{Result: No. of unique mutations the 4 lineages contribute to} + +length(df_lin_dr$mutationinformation) + +u2 = unique(my_df_dr$mutationinformation) +u = unique(df_lin_dr$mutationinformation) +check = u2[!u2%in%u]; print(check) #{Muts not present within selected lineages} + +#%%%%%%%%%%%%%%%%%%%%%%%%% +# REASSIGNMENT +df_dr <- df_lin_dr +#%%%%%%%%%%%%%%%%%%%%%%%%% + +rm(df_lin_dr) + +#****************** +# generate distribution plot of lineages +#****************** +# 2 : ggridges (good!) +my_ats = 15 # axis text size +my_als = 20 # axis label size + + +# check plot name +plot_lineage_dr_duet + +# output svg +#svg(plot_lineage_dr_duet) +p2 = ggplot(df_dr, aes(x = duet_scaled + , y = duet_outcome))+ + + geom_density_ridges_gradient(aes(fill = ..x..) + #, jittered_points = TRUE + , scale = 3 + , size = 0.3) + + #geom_point(aes(size = or_mychisq))+ + facet_wrap( ~lineage + , scales = "free" + #, switch = 'x' + , labeller = labeller(lineage = my_labels) ) + + coord_cartesian( xlim = c(-1, 1) + #, ylim = c(0, 6) + #, clip = "off" + ) + + scale_fill_gradientn(colours = c("#f8766d", "white", "#00bfc4") + , name = "DUET" ) + + theme(axis.text.x = element_text(size = my_ats + , angle = 90 + , hjust = 1 + , vjust = 0.4) + , axis.text.y = element_blank() + , axis.title.x = element_blank() + , axis.title.y = element_blank() + , axis.ticks.y = element_blank() + , plot.title = element_blank() + , strip.text = element_text(size = my_als) + , legend.text = element_text(size = 10) + , legend.title = element_text(size = my_als) + #, legend.position = "none" + ) + +print(p2) +#dev.off() +######################################################################## +#============== +# combine plot +#=============== + +svg(plot_lineage_dist_combined, width = 12, height = 6) + +printFile = cowplot::plot_grid(p1, p2 + , label_size = my_als+10) + +print(printFile) +dev.off() diff --git a/scripts/plotting/redundant/lineage_dist_dm_om_combined_PS.R b/scripts/plotting/redundant/lineage_dist_dm_om_combined_PS.R new file mode 100755 index 0000000..3875382 --- /dev/null +++ b/scripts/plotting/redundant/lineage_dist_dm_om_combined_PS.R @@ -0,0 +1,309 @@ +#!/usr/bin/env Rscript +######################################################### +# TASK: Lineage dist plots: ggridges + +# Output: 2 SVGs for PS stability + +# 1) all muts +# 2) dr_muts + +########################################################## +# Installing and loading required packages +########################################################## +getwd() +setwd("~/git/LSHTM_analysis/scripts/plotting/") +getwd() + +source("Header_TT.R") + +source("get_plotting_dfs.R") + +cat("cols imported:" + , mcsm_red2, mcsm_red1, mcsm_mid, mcsm_blue1, mcsm_blue2) + +#======= +# output +#======= +lineage_dist_combined_dm_om = "lineage_dist_combined_dm_om_PS.svg" +plot_lineage_dist_combined_dm_om = paste0(plotdir,"/", lineage_dist_combined_dm_om) + +lineage_dist_combined_dm_om_L = "lineage_dist_combined_dm_om_PS_labelled.svg" +plot_lineage_dist_combined_dm_om_L = paste0(plotdir,"/", lineage_dist_combined_dm_om_L) + +#======================================================================== + +########################### +# Data for plots +# you need merged_df2 or merged_df2_comp +# since this is one-many relationship +# i.e the same SNP can belong to multiple lineages +# using the _comp dataset means +# we lose some muts and at this level, we should use +# as much info as available, hence use df with NA +########################### + +#=================== +# Data for plots +#=================== +lin_dist_plot = merged_df2[merged_df2$lineage%in%c("lineage1", "lineage2", "lineage3", "lineage4"),] +table(lin_dist_plot$lineage) + +#{RESULT: Total number of samples for lineage} +sum(table(lin_dist_plot$lineage)) + +#{RESULT: No of samples within lineage} +table(lin_dist_plot$lineage) + +#{Result: No. of unique mutations the 4 lineages contribute to} +length(unique(lin_dist_plot$mutationinformation)) + +u2 = unique(lin_dist_plot$mutationinformation) +u = unique(lin_dist_plot$mutationinformation) + +#{Result:Muts not present within selected lineages} +check = u2[!u2%in%u]; print(check) + +# workaround to make labels appear nicely for in otherwise cases +#================== +# lineage: labels +# from "plyr" +#================== +#{Result:No of samples in selected lineages} +table(lin_dist_plot$lineage) + +lin_dist_plot$lineage_labels = mapvalues(lin_dist_plot$lineage + , from = c("lineage1","lineage2", "lineage3", "lineage4") + , to = c("Lineage 1", "Lineage 2", "Lineage 3", "Lineage 4")) +table(lin_dist_plot$lineage_labels) + +table(lin_dist_plot$lineage_labels) == table(lin_dist_plot$lineage) + +#======================== +# mutation_info: labels +#======================== +#{Result:No of DM and OM muts in selected lineages} +table(lin_dist_plot$mutation_info) + +lin_dist_plot$mutation_info_labels = ifelse(lin_dist_plot$mutation_info == dr_muts_col + , "DM", "OM") +table(lin_dist_plot$mutation_info_labels) + +table(lin_dist_plot$mutation_info) == table(lin_dist_plot$mutation_info_labels) + +#======================== +# duet_outcome: labels +#======================== +#{Result: No. of D and S mutations in selected lineages} +table(lin_dist_plot$duet_outcome) + +lin_dist_plot$duet_outcome_labels = ifelse(lin_dist_plot$duet_outcome == "Destabilising" + , "D", "S") +table(lin_dist_plot$duet_outcome_labels) + +table(lin_dist_plot$duet_outcome) == table(lin_dist_plot$duet_outcome_labels) + + +#======================= +# subset dr muts only +#======================= +#my_df_dr = subset(df_lin, mutation_info == dr_muts_col) +#table(my_df_dr$mutation_info) +#table(my_df_dr$lineage) + +#========================= +# subset other muts only +#========================= +#my_df_other = subset(df_lin, mutation_info == other_muts_col) +#table(my_df_other$mutation_info) +#table(my_df_other$lineage) + +######################################################################## +# end of data extraction and cleaning for plots # +######################################################################## +#****************** +# generate distribution plot of lineages +#****************** +# 2 : ggridges (good!) +my_ats = 15 # axis text size +my_als = 20 # axis label size +n_colours = length(unique(lin_dist_plot$duet_scaled)) + +my_palette <- colorRampPalette(c(mcsm_red2, mcsm_red1, mcsm_mid, mcsm_blue1, mcsm_blue2))(n = n_colours+1) + +#======================================= +# Plot 1: lineage dist: geom_density_ridges_gradient (allows aesthetics to vary along ridgeline, no alpha setting!) +# else same as geom_density_ridges) +# x = duet_scaled +# y = duet_outcome +# fill = duet_scaled +# Facet: Lineage +#======================================= +# output individual svg +#plot_lineage_dist_duet_f paste0(plotdir,"/", "lineage_dist_duet_f.svg") +#plot_lineage_dist_duet_f +#svg(plot_lineage_dist_duet_f) + +p1 = ggplot(lin_dist_plot, aes(x = duet_scaled + #, y = duet_outcome + , y = mutation_info_labels + ))+ + geom_density_ridges_gradient(aes(fill = ..x..) + #, jittered_points = TRUE + , scale = 3 + , size = 0.3 ) + + facet_wrap( ~lineage_labels + #~mutation_info_labels + # ~mutation_info_labels + # , scales = "free" + # , labeller = labeller(lineage = my_labels) + ) + + #coord_cartesian( xlim = c(-1, 1)) + + scale_x_continuous(expand = c(0.01, 0)) + + + scale_fill_gradientn(colours = my_palette + , name = "DUET" + #, breaks = c(-1, 0, 1) + #, labels = c(-1,0,1) + #, limits = c(-1,1) + ) + + theme(axis.text.x = element_text(size = my_ats + , angle = 90 + , hjust = 1 + , vjust = 0.4) + #, axis.text.y = element_blank() + , axis.text.y = element_text(size = my_ats) + , axis.title.x = element_text(size = my_ats) + , axis.title.y = element_blank() + , axis.ticks.y = element_blank() + , plot.title = element_blank() + , strip.text = element_text(size = my_als) + , legend.text = element_text(size = my_als-10) + #, legend.title = element_text(size = my_als-6) + , legend.title = element_blank() + , legend.position = c(-0.08, 0.41) + , legend.direction = "horizontal" + , legend.position = "top" +)+ + labs(x = "DUET") + +p1 + + +#p1_with_legend = p1 + guides(fill = guide_colourbar(label = FALSE)) + +#======================================= +# Plot 2: lineage dist: geom_density_ridges, allows alpha to be set +# x = duet_scaled +# y = lineage_labels +# fill = mutation_info +# NO FACET +#======================================= +# output svg +#plot_lineage_dist_duet_dm_om = paste0(plotdir,"/", "lineage_dist_duet_dm_om.svg") +#plot_lineage_dist_duet_dm_om +#svg(plot_lineage_dist_duet_dm_om) + +p2 = ggplot(lin_dist_plot, aes(x = duet_scaled + , y = lineage_labels))+ + geom_density_ridges(aes(fill = factor(mutation_info_labels)) + , scale = 3 + , size = 0.3 + , alpha = 0.8) + + scale_x_continuous(expand = c(0.01, 0)) + + #coord_cartesian( xlim = c(-1, 1)) + + scale_fill_manual(values = c("#E69F00", "#999999")) + + theme(axis.text.x = element_text(size = my_ats + , angle = 90 + , hjust = 1 + , vjust = 0.4) + , axis.text.y = element_text(size = my_ats) + , axis.title.x = element_text(size = my_ats) + , axis.title.y = element_blank() + , axis.ticks.y = element_blank() + , plot.title = element_blank() + , strip.text = element_text(size = my_als) + , legend.text = element_text(size = my_als-4) + , legend.title = element_text(size = my_als-4) + , legend.position = c(0.8, 0.9)) + + labs(x = "DUET" + , fill = "Mutation class") # legend title + +p2 + +#======================================= +# Plot 3: lineage dist: geom_density_ridges_gradient (allows aesthetics to vary along ridgeline, no alpha setting!) +# else same as geom_density_ridges) +# x = duet_scaled +# y = lineage_labels +# fill = duet_scaled +# NO FACET (nf) +#======================================= +# output individual svg +#plot_lineage_dist_duet_nf = paste0(plotdir,"/", "lineage_dist_duet_nf.svg") +#plot_lineage_dist_duet_nf +#svg(plot_lineage_dist_duet_nf) + +p3 = ggplot(lin_dist_plot, aes(x = duet_scaled + , y = lineage_labels))+ + # geom_density_ridges_gradient(aes(fill = ..x..) + # #, jittered_points = TRUE + # , scale = 3 + # , size = 0.3 ) + + geom_density_ridges()+ + #facet_wrap (~mutation_info_labels) + + #coord_cartesian( xlim = c(-1, 1)) + + scale_x_continuous(expand = c(0.01, 0)) + + + #scale_fill_gradientn(colours = my_palette, name = "DUET") + + theme(axis.text.x = element_text(size = my_ats + , angle = 90 + , hjust = 1 + , vjust = 0.4) + + , axis.text.y = element_text(size = my_ats) + , axis.title.x = element_text(size = my_ats) + , axis.title.y = element_blank() + , axis.ticks.y = element_blank() + , plot.title = element_blank() + , strip.text = element_text(size = my_als) + , legend.text = element_text(size = my_als-10) + , legend.title = element_text(size = my_als-3) + , legend.position = c(0.8, 0.8)) + + #, legend.direction = "horizontal")+ + #, legend.position = "top")+ + labs(x = "DUET") + +p3 + +######################################################################## +#============== +# combine plots +#=============== +# 1) without labels +plot_lineage_dist_combined_dm_om +svg(plot_lineage_dist_combined_dm_om, width = 12, height = 6) + +OutPlot1 = cowplot::plot_grid(p1, p2 + , rel_widths = c(0.5/2, 0.5/2)) + +print(OutPlot1) +dev.off() + + +# 2) with labels +plot_lineage_dist_combined_dm_om_L +svg(plot_lineage_dist_combined_dm_om_L, width = 12, height = 6) + +OutPlot2 = cowplot::plot_grid(p1, p2 + #, labels = c("(a)", "(b)") + , labels = "AUTO" + #, label_x = -0.045, label_y = 0.92 + #, hjust = -0.7, vjust = -0.5 + #, align = "h" + , rel_widths = c(0.5/2, 0.5/2) + , label_size = my_als) + +print(OutPlot2) +dev.off() + +##############################################################################