modified bp with option for adding stats and boxplplots. Moved old one to redundant

This commit is contained in:
Tanushree Tunstall 2021-09-02 12:50:24 +01:00
parent c8e21b928c
commit 2c65bb25d8
8 changed files with 443 additions and 102 deletions

View file

@ -3,12 +3,6 @@
#########################################################
#lib_loc = "/usr/local/lib/R/site-library")
#if (!require("gplots")) {
# install.packages("gplots", dependencies = TRUE)
# library(gplots)
#}
require(extrafont)
require("getopt", quietly = TRUE) # cmd parse arguments
if (!require("tidyverse")) {
@ -16,9 +10,23 @@ if (!require("tidyverse")) {
library(tidyverse)
}
if (!require("ggplot2")) {
install.packages("ggplot2", dependencies = TRUE)
library(ggplot2)
# if (!require("ggplot2")) {
# install.packages("ggplot2", dependencies = TRUE)
# library(ggplot2)
# }
# if (!require ("dplyr")){
# install.packages("dplyr")
# library(dplyr)
# }
# Install
#if(!require(devtools)) install.packages("devtools")
#devtools::install_github("kassambara/ggcorrplot")
if (!require ("ggbeeswarm")){
install.packages("ggbeeswarm")
library(ggbeeswarm)
}
if (!require("plotly")) {
@ -101,11 +109,6 @@ if (!require ("psych")){
library(psych)
}
if (!require ("dplyr")){
install.packages("dplyr")
library(dplyr)
}
if (!require ("compare")){
install.packages("compare")
library(compare)
@ -116,31 +119,25 @@ if (!require ("arsenal")){
library(arsenal)
}
if(!require(ggseqlogo)){
install.packages("ggseqlogo")
library(ggseqlogo)
}
#if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#BiocManager::install("Logolas")
library("Logolas")
#install.packages("ggseqlogo")
library(ggseqlogo)
####TIDYVERSE
# Install
#if(!require(devtools)) install.packages("devtools")
#devtools::install_github("kassambara/ggcorrplot")
library(ggcorrplot)
###for PDB files
#install.packages("bio3d")
# for PDB files
if(!require(bio3d)){
install.packages("bio3d")
library(bio3d)
}
#install.packages("protr")
library(protr)
if(!require(protr)){
install.packages("protr")
library(protr)
}
#if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#BiocManager::install("Logolas")
library("Logolas")

View file

@ -86,8 +86,10 @@ all_plot_dfs = combining_dfs_plotting(my_df_u
, lig_dist_colname = LigDist_colname
, lig_dist_cutoff = LigDist_cutoff)
merged_df2 = all_plot_dfs[[1]]
merged_df3 = all_plot_dfs[[2]]
merged_df2 = all_plot_dfs[[1]]
merged_df3 = all_plot_dfs[[2]]
merged_df2_comp = all_plot_dfs[[3]]
merged_df3_comp = all_plot_dfs[[4]]
#======================================================================
# read other files
infilename_dynamut = paste0("~/git/Data/", drug, "/output/dynamut_results/", gene
@ -98,10 +100,15 @@ infilename_dynamut2 = paste0("~/git/Data/", drug, "/output/dynamut_results/dyna
infilename_mcsm_na = paste0("~/git/Data/", drug, "/output/mcsm_na_results/", gene
, "_complex_mcsm_na_norm.csv")
infilename_mcsm_f_snps <- paste0("~/git/Data/", drug, "/output/", gene
, "_mcsm_formatted_snps.csv")
dynamut_df = read.csv(infilename_dynamut)
dynamut2_df = read.csv(infilename_dynamut2)
mcsm_na_df = read.csv(infilename_mcsm_na)
mcsm_f_snps = read.csv(infilename_mcsm_f_snps, header = F)
names(mcsm_f_snps) = "mutationinformation"
####################################################################
# Data for subcols barplot (~heatmpa)
@ -430,11 +437,17 @@ if (nrow(corr_ps_df3) == nrow(merged_df3) && nrow(merged_df3_comp) == check1) {
, "\nGot: ", check1)
}
rm(foo)
####################################################################
# Data for DM OM Plots: Long format dfs
####################################################################
source("other_plots_data.R")
########################################################################
# End of script
########################################################################
rm(foo)
cat("\n===================================================\n"
cat("\n######################################################\n"
, "\nSuccessful: get_plotting_dfs.R worked!"
, "\n====================================================")
, "\n###################################################\n")

View file

@ -3,10 +3,9 @@
# TASK: producing boxplots for dr and other muts
#########################################################
#=======================================================================
# working dir and loading libraries
# getwd()
setwd("~/git/LSHTM_analysis/scripts/plotting")
# setwd("~/git/LSHTM_analysis/scripts/plotting")
# getwd()
# make cmd
@ -14,21 +13,21 @@ setwd("~/git/LSHTM_analysis/scripts/plotting")
# drug = "streptomycin"
# gene = "gid"
source("get_plotting_dfs.R")
# source("get_plotting_dfs.R")
#=======================================================================
# MOVE TO COMBINE or singular file for deepddg
#
# cols_to_select = c("mutation", "mutationinformation"
# , "wild_type", "position", "mutant_type"
# , "mutation_info")
#
# merged_df3_short = merged_df3[, cols_to_select]
cols_to_select = c("mutation", "mutationinformation"
, "wild_type", "position", "mutant_type"
, "mutation_info")
merged_df3_short = merged_df3[, cols_to_select]
infilename_mcsm_f_snps <- paste0("~/git/Data/", drug, "/output/", gene
, "_mcsm_formatted_snps.csv")
mcsm_f_snps<- read.csv(infilename_mcsm_f_snps, header = F)
names(mcsm_f_snps) <- "mutationinformation"
# infilename_mcsm_f_snps <- paste0("~/git/Data/", drug, "/output/", gene
# , "_mcsm_formatted_snps.csv")
#
# mcsm_f_snps<- read.csv(infilename_mcsm_f_snps, header = F)
# names(mcsm_f_snps) <- "mutationinformation"
# write merged_df3 to generate structural figure on chimera
#write.csv(merged_df3_short, "merged_df3_short.csv")
@ -52,11 +51,11 @@ my_min = min(merged_df3$deepddg_scaled); my_min
my_max = max(merged_df3$deepddg_scaled); my_max
if (my_min == -1 && my_max == 1){
cat("PASS: DeepDDG successfully scaled b/w -1 and 1"
cat("\nPASS: DeepDDG successfully scaled b/w -1 and 1"
#, "\nProceeding with assigning deep outcome category")
, "\n")
}else{
cat("FAIL: could not scale DeepDDG ddg values"
cat("\nFAIL: could not scale DeepDDG ddg values"
, "Aborting!")
}
@ -100,7 +99,7 @@ if (merging_cols == "mutationinformation") {
cols_check <- c(c1, c2, c3, c4)
expected_cols = n_comb_cols - ( length(cols_check) - 1)
if (all(cols_check)){
cat("\nStage 2:Proceeding with merging dfs:\n")
cat("\nStage 2: Proceeding with merging dfs:\n")
comb_df <- Reduce(inner_join, list(cols_mcsm_df
, cols_mcsm_na_df
, dynamut_df
@ -115,12 +114,13 @@ if (merging_cols == "mutationinformation") {
}
}
names(comb_df_s)
#names(comb_df_s)
cat("\n!!!IT GOT TO HERE!!!!")
#=======================================================================
fact_cols = colnames(comb_df_s)[grepl( "_outcome|_info", colnames(comb_df_s) )]
fact_cols
lapply(comb_df_s[, fact_cols], class)
comb_df_s[,fact_cols] <- lapply(comb_df_s[,cols],as.factor)
comb_df_s[, fact_cols] <- lapply(comb_df_s[, fact_cols], as.factor)
if (any(lapply(comb_df_s[, fact_cols], class) == "character")){
cat("\nChanging cols to factor")
@ -512,7 +512,6 @@ rm(all_plot_dfs
, my_data_snp
, my_df
, my_df_u
, ols_mcsm_df
, other_muts
, pd_df
, subcols_df_ps