diff --git a/scripts/plotting/corr_plots.R b/scripts/plotting/corr_plots.R new file mode 100644 index 0000000..c2d3e17 --- /dev/null +++ b/scripts/plotting/corr_plots.R @@ -0,0 +1,260 @@ +#!/usr/bin/env Rscript +######################################################### +# TASK: Corr plots for PS and Lig + +# Output: 1 svg + +#======================================================================= +# working dir and loading libraries +getwd() +setwd("~/git/LSHTM_analysis/scripts/plotting/") +getwd() + + +source("Header_TT.R") +require(cowplot) +source("combining_dfs_plotting.R") +# should return the following dfs, directories and variables + +# PS combined: +# 1) merged_df2 +# 2) merged_df2_comp +# 3) merged_df3 +# 4) merged_df3_comp + +# LIG combined: +# 5) merged_df2_lig +# 6) merged_df2_comp_lig +# 7) merged_df3_lig +# 8) merged_df3_comp_lig + +# 9) my_df_u +# 10) my_df_u_lig + +cat(paste0("Directories imported:" + , "\ndatadir:", datadir + , "\nindir:", indir + , "\noutdir:", outdir + , "\nplotdir:", plotdir)) + +cat(paste0("Variables imported:" + , "\ndrug:", drug + , "\ngene:", gene + , "\ngene_match:", gene_match + , "\nAngstrom symbol:", angstroms_symbol + , "\nNo. of duplicated muts:", dup_muts_nu + , "\nNA count for ORs:", na_count + , "\nNA count in df2:", na_count_df2 + , "\nNA count in df3:", na_count_df3)) + +#======= +# output +#======= +# can't combine by cowplot because not ggplots +#corr_plot_combined = "corr_combined.svg" +#plot_corr_plot_combined = paste0(plotdir,"/", corr_plot_combined) + +# PS +corr_ps = "corr_PS.svg" +plot_corr_ps = paste0(plotdir,"/", corr_ps) + +# LIG +corr_lig = "corr_LIG.svg" +plot_corr_lig = paste0(plotdir,"/", corr_lig) + +#################################################################### +# end of loading libraries and functions # +######################################################################## + +#%%%%%%%%%%%%%%%%%%%%%%%%% +df_ps = merged_df3_comp +df_lig = merged_df3_comp_lig +#%%%%%%%%%%%%%%%%%%%%%%%%% + +rm( merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig, my_df_u, my_df_u_lig) + +######################################################################## +# end of data extraction and cleaning for plots # +######################################################################## + +#=========================== +# Data for Correlation plots:PS +#=========================== +# adding log cols +df_ps$log10_or_mychisq = log10(df_ps$or_mychisq) +df_ps$neglog_pval_fisher = -log10(df_ps$pval_fisher) + +df_ps$log10_or_kin = log10(df_ps$or_kin) +df_ps$neglog_pwald_kin = -log10(df_ps$pwald_kin) + +# subset data to generate pairwise correlations +cols_to_select = c("duet_scaled" + + , "log10_or_mychisq" + , "neglog_pval_fisher" + + , "log10_or_kin" + , "neglog_pwald_kin" + + , "af" + + , "duet_outcome" + , drug) + +corr_data_ps = df_ps[, cols_to_select] + +dim(corr_data_ps) + +#p_italic = substitute(paste("-Log(", italic('P'), ")"));p_italic +#p_adjusted_italic = substitute(paste("-Log(", italic('P adjusted'), ")"));p_adjusted_italic + +# assign nice colnames (for display) +my_corr_colnames = c("DUET" + + , "Log(OR)" + , "-Log(P)" + + , "Log(OR adjusted)" + , "-Log(P wald)" + + , "AF" + + , "duet_outcome" + , drug) + +length(my_corr_colnames) + +colnames(corr_data_ps) +colnames(corr_data_ps) <- my_corr_colnames +colnames(corr_data_ps) + +#----------------- +# generate corr PS plot +#----------------- +start = 1 +end = which(colnames(corr_data_ps) == drug); end # should be the last column +offset = 1 + +my_corr_ps = corr_data_ps[start:(end-offset)] +head(my_corr_ps) + +#my_cols = c("#f8766d", "#00bfc4") +# deep blue :#007d85 +# deep red: #ae301e + +cat("Corr plot PS:", plot_corr_ps) +svg(plot_corr_ps, width = 15, height = 15) + +OutPlot1 = pairs.panels(my_corr_ps[1:(length(my_corr_ps)-1)] + , method = "spearman" # correlation method + , hist.col = "grey" ##00AFBB + , density = TRUE # show density plots + , ellipses = F # show correlation ellipses + , stars = T + , rug = F + , breaks = "Sturges" + , show.points = T + , bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_ps$duet_outcome))] + , pch = 21 + , jitter = T + #, alpha = .05 + #, points(pch = 19, col = c("#f8766d", "#00bfc4")) + , cex = 3 + , cex.axis = 2.5 + , cex.labels = 2.1 + , cex.cor = 1 + , smooth = F +) + +print(OutPlot1) +dev.off() + +#=========================== +# Data for Correlation plots: LIG +#=========================== + +df_lig$log10_or_mychisq = log10(df_lig$or_mychisq) +df_lig$neglog_pval_fisher = -log10(df_lig$pval_fisher) + + +df_lig$log10_or_kin = log10(df_lig$or_kin) +df_lig$neglog_pwald_kin = -log10(df_lig$pwald_kin) + + +# subset data to generate pairwise correlations +cols_to_select = c("affinity_scaled" + + , "log10_or_mychisq" + , "neglog_pval_fisher" + + , "log10_or_kin" + , "neglog_pwald_kin" + + , "af" + + , "ligand_outcome" + , drug) + +corr_data_lig = df_lig[, cols_to_select] + + +dim(corr_data_lig) + +# assign nice colnames (for display) +my_corr_colnames = c("Ligand Affinity" + + , "Log(OR)" + , "-Log(P)" + + , "Log(OR adjusted)" + , "-Log(P wald)" + + , "AF" + + , "ligand_outcome" + , drug) + +length(my_corr_colnames) + +colnames(corr_data_lig) +colnames(corr_data_lig) <- my_corr_colnames +colnames(corr_data_lig) + +#----------------- +# generate corr LIG plot +#----------------- + +start = 1 +end = which(colnames(corr_data_lig) == drug); end # should be the last column +offset = 1 + +my_corr_lig = corr_data_lig[start:(end-offset)] +head(my_corr_lig) + +cat("Corr LIG plot:", plot_corr_lig) +svg(plot_corr_lig, width = 15, height = 15) + +OutPlot2 = pairs.panels(my_corr_lig[1:(length(my_corr_lig)-1)] + , method = "spearman" # correlation method + , hist.col = "grey" ##00AFBB + , density = TRUE # show density plots + , ellipses = F # show correlation ellipses + , stars = T + , rug = F + , breaks = "Sturges" + , show.points = T + , bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_lig$ligand_outcome))] + , pch = 21 + , jitter = T + #, alpha = .05 + #, points(pch = 19, col = c("#f8766d", "#00bfc4")) + , cex = 3 + , cex.axis = 2.5 + , cex.labels = 2.1 + , cex.cor = 1 + , smooth = F +) + +print(OutPlot2) +dev.off() +####################################################### diff --git a/scripts/plotting/lineage_dist_combined_PS.R b/scripts/plotting/lineage_dist_combined_PS.R index d62b038..f59cf44 100644 --- a/scripts/plotting/lineage_dist_combined_PS.R +++ b/scripts/plotting/lineage_dist_combined_PS.R @@ -53,6 +53,12 @@ cat("Variables imported:" , "\nother_muts_col:", other_muts_col , "\ndrtype_col:", resistance_col) +#======= +# output +#======= +lineage_dist_combined = "lineage_dist_combined_PS.svg" +plot_lineage_dist_combined = paste0(plotdir,"/", lineage_dist_combined) +#======================================================================== ########################### # Data for plots @@ -281,13 +287,10 @@ p2 = ggplot(df_dr, aes(x = duet_scaled print(p2) #dev.off() -#==================== -#combine -#======= -# output -#======= -lineage_dist_combined = "lineage_dist_combined_PS.svg" -plot_lineage_dist_combined = paste0(plotdir,"/", lineage_dist_combined) +######################################################################## +#============== +# combine plot +#=============== svg(plot_lineage_dist_combined, width = 12, height = 6) diff --git a/scripts/plotting/or_plots_combined.R b/scripts/plotting/or_plots_combined.R index badfda5..a664f1b 100644 --- a/scripts/plotting/or_plots_combined.R +++ b/scripts/plotting/or_plots_combined.R @@ -1,8 +1,8 @@ #!/usr/bin/env Rscript ######################################################### -# TASK: Basic lineage barplot showing numbers +# TASK: Bubble plot of OR for PS and Lig -# Output: Basic barplot with lineage samples and mut count +# Output: 1 svg #======================================================================= # working dir and loading libraries