renamed 2 to _v2

This commit is contained in:
Tanushree Tunstall 2022-08-22 10:53:25 +01:00
parent 802d6f8495
commit 8d6c148fff
7 changed files with 74 additions and 588 deletions

View file

@ -53,6 +53,7 @@ if (!exists("infile_params") && exists("gene")){
cat("\nReading mcsm combined data file: ", infile_params)
mcsm_df = read.csv(infile_params, header = T)
pd_df = plotting_data(mcsm_df
, gene = gene # ADDED
, lig_dist_colname = LigDist_colname
, lig_dist_cutoff = LigDist_cutoff)
@ -87,6 +88,7 @@ cat("\nDim of meta data file: ", dim(gene_metadata))
all_plot_dfs = combining_dfs_plotting(my_df_u
, gene_metadata
, gene = gene # ADDED
, lig_dist_colname = LigDist_colname
, lig_dist_cutoff = LigDist_cutoff)

View file

@ -92,8 +92,8 @@ if (tolower(gene)%in%geneL_na){
naDist_colname,
"mcsm_na_affinity", "mcsm_na_scaled", "mcsm_na_outcome")
raw_affinity_cols = c(common_raw_affinity_cols , "mcsm_na_affinity")
scaled_affinity_cols = c(common_scaled_affinity_cols , "mcsm_na_scaled")
raw_affinity_cols = c(common_raw_affinity_cols , "mcsm_na_affinity")
scaled_affinity_cols = c(common_scaled_affinity_cols , "mcsm_na_scaled")
outcome_affinity_cols = c(common_outcome_affinity_cols , "mcsm_na_outcome")
affinity_dist_colnames = c(LigDist_colname, ppi2Dist_colname, naDist_colname)

View file

@ -30,8 +30,8 @@
#source("~/git/LSHTM_analysis/config/gid.R")
#source("~/git/LSHTM_analysis/config/alr.R")
source("~/git/LSHTM_analysis/config/katg.R")
#source("~/git/LSHTM_analysis/config/rpob.R")
#source("~/git/LSHTM_analysis/config/katg.R")
source("~/git/LSHTM_analysis/config/rpob.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") sourced by above

View file

@ -1,584 +0,0 @@
#!/usr/bin/env Rscript
#########################################################
# TASK: Barplots for mCSM DUET, ligand affinity, and foldX
# basic barplots with count of mutations
# basic barplots with frequency of count of mutations
# , df_colname = ""
# , leg_title = ""
# , ats = 25 # axis text size
# , als = 22 # axis label size
# , lts = 20 # legend text size
# , ltis = 22 # label title size
# , geom_ls = 10 # geom_label size
# , yaxis_title = "Number of nsSNPs"
# , bp_plot_title = ""
# , label_categories = c("Destabilising", "Stabilising")
# , title_colour = "chocolate4"
# , subtitle_text = NULL
# , sts = 20
# , subtitle_colour = "pink"
# #, leg_position = c(0.73,0.8) # within plot area
# , leg_position = "top"
# , bar_fill_values = c("#F8766D", "#00BFC4")
#########################################################
#=============
# Data: Input
#==============
#source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/gid.R")
source("~/git/LSHTM_analysis/config/alr.R")
#source("~/git/LSHTM_analysis/config/katg.R")
#source("~/git/LSHTM_analysis/config/rpob.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") sourced by above
# sanity check
cat("\nSourced plotting cols as well:", length(plotting_cols))
####################################################
class(merged_df3)
merged_df3 = as.data.frame(merged_df3)
class(merged_df3)
head(merged_df3$pos_count)
nc_pc_CHANGE = which(colnames(merged_df3)== "pos_count"); nc_pc_CHANGE
colnames(merged_df3)[nc_pc_CHANGE] = "df2_pos_count_all"
head(merged_df3$pos_count)
head(merged_df3$df2_pos_count_all)
# DROP pos_count column
# merged_df3$pos_count <-NULL
merged_df3 = merged_df3[, !colnames(merged_df3)%in%c("pos_count")]
head(merged_df3$pos_count)
df3 = merged_df3[, colnames(merged_df3)%in%plotting_cols]
"nca_distance"%in%colnames(df3)
#=======
# output
#=======
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
cat("plots will output to:", outdir_images)
###########################################################
#------------------------------
# plot default sizes
#------------------------------
#=========================
# Affinity outcome
# check this var: outcome_cols_affinity
# get from preformatting or put in globals
#==========================
DistCutOff
LigDist_colname # = "ligand_distance" # from globals
ppi2Dist_colname
naDist_colname
###########################################################
# get plotting data within the distance
df3_lig = df3[df3[[LigDist_colname]]<DistCutOff,]
df3_ppi2 = df3[df3[[ppi2Dist_colname]]<DistCutOff,]
df3_na = df3[df3[[naDist_colname]]<DistCutOff,]
common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
#------------------------------
# barplot for ligand affinity:
# <10 Ang of ligand
#------------------------------
mLigP = stability_count_bp(plotdf = df3_lig
, df_colname = "ligand_outcome"
#, leg_title = "mCSM-lig"
#, bp_plot_title = paste(common_bp_title, "ligand")
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "mCSM-lig"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5)
mLigP
#------------------------------
# barplot for ligand affinity:
# <10 Ang of ligand
# mmCSM-lig: will be the same no. of sites but the effect will be different
#------------------------------
mmLigP = stability_count_bp(plotdf = df3_lig
, df_colname = "mmcsm_lig_outcome"
#, leg_title = "mmCSM-lig"
#, label_categories = labels_mmlig
#, bp_plot_title = paste(common_bp_title, "ligand")
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "mmCSM-lig"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5
)
mmLigP
#------------------------------
# barplot for ppi2 affinity
# <10 Ang of interface
#------------------------------
if (tolower(gene)%in%geneL_ppi2){
ppi2P = stability_count_bp(plotdf = df3_ppi2
, df_colname = "mcsm_ppi2_outcome"
#, leg_title = "mCSM-ppi2"
#, label_categories = labels_ppi2
#, bp_plot_title = paste(common_bp_title, "PP-interface")
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "mCSM-ppi2"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5
)
ppi2P
}
#----------------------------
# barplot for ppi2 affinity
# <10 Ang of interface
#------------------------------
if (tolower(gene)%in%geneL_na){
nca_distP = stability_count_bp(plotdf = df3_na
, df_colname = "mcsm_na_outcome"
#, leg_title = "mCSM-NA"
#, label_categories =
#, bp_plot_title = paste(common_bp_title, "Dist to NA")
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "mCSM-NA"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5
)
nca_distP
}
#####################################################################
# ------------------------------
# bp site site count: mCSM-lig
# < 10 Ang ligand
# ------------------------------
common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
posC_lig = site_snp_count_bp(plotdf = df3_lig
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"
, subtitle_colour = "chocolate4"
, subtitle_text = ""
, subtitle_size = 8
, geom_ls = 2.6
, leg_text_size = 10
, axis_text_size = 10
, axis_label_size = 10)
posC_lig
# ------------------------------
# bp site site count: ppi2
# < 10 Ang interface
# ------------------------------
if (tolower(gene)%in%geneL_ppi2){
posC_ppi2 = site_snp_count_bp(plotdf = df3_ppi2
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"
, subtitle_colour = "chocolate4"
, subtitle_text = ""
, subtitle_size = 8
, geom_ls = 2.6
, leg_text_size = 10
, axis_text_size = 10
, axis_label_size = 10)
posC_ppi2
}
# ------------------------------
# bp site site count: NCA dist
# < 10 Ang nca
# ------------------------------
if (tolower(gene)%in%geneL_na){
posC_nca = site_snp_count_bp(plotdf = df3_na
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"
, subtitle_colour = "chocolate4"
, subtitle_text = ""
, subtitle_size = 8
, geom_ls = 2.6
, leg_text_size = 10
, axis_text_size = 10
, axis_label_size = 10)
posC_nca
}
#===============================================================
# PE count
rects <- data.frame(x = 1:6,
colors = c("#ffd700" #gold
, "#f0e68c" #khaki
, "#da70d6"# orchid
, "#ff1493"# deeppink
, "#00BFC4" #, "#007d85" #blue
, "#F8766D" )# red,
)
rects
rects$text = c("-ve Lig"
, "+ve Lig"
, "+ve PPI2"
, "-ve PPI2"
, "+ve stability"
, "-ve stability")
# FOR EMBB ONLY
rects$numbers = c(38, 0, 22, 9, 108, 681)
rects$num_labels = paste0("n=", rects$numbers)
rects
#https://stackoverflow.com/questions/47986055/create-a-rectangle-filled-with-text
peP = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
geom_tile(width = 1, height = 1) + # make square tiles
geom_text(color = "black", size = 1.7) + # add white text in the middle
scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
coord_fixed() + # make sure tiles are square
coord_flip()+ scale_x_reverse() +
# theme_void() # remove any axis markings
theme_nothing() # remove any axis markings
peP
peP2 = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
geom_tile() + # make square tiles
geom_text(color = "black", size = 1.6) + # add white text in the middle
scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
coord_fixed() + # make sure tiles are square
theme_nothing() # remove any axis markings
peP2
# ------------------------------
# bp site site count: ALL
# <10 Ang ligand
# ------------------------------
posC_all = site_snp_count_bp(plotdf = df3
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"
, subtitle_colour = "chocolate4"
, subtitle_text = "All mutations sites"
, subtitle_size = 8
, geom_ls = 2.6
, leg_text_size = 10
, axis_text_size = 10
, axis_label_size = 10)
posC_all
##################################################################
#------------------------------
# barplot for sensitivity:
#------------------------------
# sensP = stability_count_bp(plotdf = df3
# , df_colname = "sensitivity"
# #, leg_title = "mCSM-ppi2"
# #, label_categories = labels_ppi2
# #, bp_plot_title = paste(common_bp_title, "PP-interface")
#
# , yaxis_title = "Number of nsSNPs"
# , leg_position = "none"
# , subtitle_text = "Sensitivity"
# , bar_fill_values = c("red", "blue")
# , subtitle_colour= "black"
# , sts = 10
# , lts = 8
# , ats = 8
# , als =8
# , ltis = 11
# , geom_ls =2
# )
consurfP = stability_count_bp(plotdf = df3
, df_colname = "consurf_outcome"
#, leg_title = "ConSurf"
#, label_categories = labels_consurf
, yaxis_title = "Number of nsSNPs"
, leg_position = "top"
, subtitle_text = "ConSurf"
, bar_fill_values = consurf_colours # from globals
, subtitle_colour= "black"
, sts = 10
, lts = 8
, ats = 8
, als = 8
, ltis = 11
, geom_ls = 2)
consurfP
####################
# Sensitivity count: Mutations
####################
table(df3$sensitivity)
rect_sens=data.frame(mutation_class=c("Resistant","Sensitive")
, tile_colour =c("red","blue")
, numbers = c(table(df3$sensitivity)[[1]], table(df3$sensitivity)[[2]]))
sensP = ggplot(rect_sens, aes(mutation_class, y = 0
, fill = tile_colour
, label = paste0("n=", numbers)
)) +
geom_tile(width = 1, height = 1) + # make square tiles
geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) + # add white text in the middle
scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
coord_fixed() + # make sure tiles are square
#coord_flip()+ scale_x_reverse() +
# theme_void() # remove any axis markings
theme_nothing() # remove any axis markings
sensP
# sensP2 = sensP +
# coord_flip() + scale_x_reverse()
# sensP2
#===============================
# Sensitivity count: Site
#==============================
table(df3$sensitivity)
#--------
# embb
#--------
#rsc = 54
#ccc = 46
#ssc = 470
rect_rs_siteC =data.frame(mutation_class=c("A_Resistant sites"
, "B_Common sites"
, "C_Sensitive sites"),
tile_colour =c("red",
"purple",
"blue"),
numbers = c(rsc, ccc, ssc),
order = c(1, 2, 3))
rect_rs_siteC$labels = paste0(rect_rs_siteC$mutation_class, "\nn=", rect_rs_siteC$ numbers)
sens_siteP = ggplot(rect_rs_siteC, aes(mutation_class, y = 0,
fill = tile_colour,
label = paste0("n=", numbers))) +
geom_tile(width = 1, height = 1) +
geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) +
theme_nothing()
sens_siteP
##############################################################
#===================
# Stability
#===================
# duetP
duetP = stability_count_bp(plotdf = df3
, df_colname = "duet_outcome"
, leg_title = "mCSM-DUET"
#, label_categories = labels_duet
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "mCSM-DUET"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5
)
duetP
# foldx
foldxP = stability_count_bp(plotdf = df3
, df_colname = "foldx_outcome"
#, leg_title = "FoldX"
#, label_categories = labels_foldx
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "FoldX"
, bar_fill_values = c("#F8766D", "#00BFC4")
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5
)
foldxP
# deepddg
deepddgP = stability_count_bp(plotdf = df3
, df_colname = "deepddg_outcome"
#, leg_title = "DeepDDG"
#, label_categories = labels_deepddg
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "DeepDDG"
, bar_fill_values = c("#F8766D", "#00BFC4")
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5
)
deepddgP
# deepddg
dynamut2P = stability_count_bp(plotdf = df3
, df_colname = "ddg_dynamut2_outcome"
#, leg_title = "Dynamut2"
#, label_categories = labels_ddg_dynamut2_outcome
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "Dynamut2"
, bar_fill_values = c("#F8766D", "#00BFC4")
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5
)
dynamut2P
# provean
proveanP = stability_count_bp(plotdf = df3
, df_colname = "provean_outcome"
#, leg_title = "PROVEAN"
#, label_categories = labels_provean
, yaxis_title = "Number of nsSNPs"
, leg_position = "none" # top
, subtitle_text = "PROVEAN"
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5
)
proveanP
# snap2
snap2P = stability_count_bp(plotdf = df3
, df_colname = "snap2_outcome"
#, leg_title = "SNAP2"
#, label_categories = labels_snap2
, yaxis_title = ""
, leg_position = "none" # top
, subtitle_text = "SNAP2"
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
, sts = 10
, lts = 8
, ats = 12
, als = 11
, ltis = 11
, geom_ls = 2.5)
snap2P
##############################################################
##############################
# FIXME for other genes: ATTEMPTED to derive numbers
##############################
#
# table(str_df_short$pe_effect_outcome)
# # extract the numbers
# DD_lig_n = table(str_df_short$pe_effect_outcome)[[1]]
# SS_lig_n = 0
# DD_ppi2_n = table(str_df_short$pe_effect_outcome)[[2]]
# SS_ppi2_n = table(str_df_short$pe_effect_outcome)[[4]]
# DD_stability_n = table(str_df_short$pe_effect_outcome)[[3]]
# SS_stability_n = table(str_df_short$pe_effect_outcome)[[5]]
#
# nums = c(DD_lig_n, SS_lig_n,DD_ppi2_n,SS_ppi2_n, DD_stability_n, SS_stability_n )
#
# rect_pe = data.frame(x = 1:6
# , pe_effect_type=c("-ve Lig aff"
# , "+ve Lig aff"
# , "-ve PPI2 aff"
# , " +ve PPI2 aff"
# , "-ve stability"
# , "+ve stability")
#
# , tile_colour =c("#ffd700" #gold
# ,"#f0e68c" # khaki
# , "#ff1493" #deeppink
# , "#da70d6" #orchid
# , "#F8766D" # Sred
# , "#00BFC4") #Sblue
# # , numbers = c(DD_lig_n
# # , SS_lig_n
# # , DD_ppi2_n
# # , SS_ppi2_n
# # , DD_stability_n
# # , SS_stability_n )
# , numbers = nums
# )
#
# rect_pe$num_labels = paste0("n=", rect_pe$numbers)
# rect_pe
#
# # create plot
# peP = ggplot(rect_pe, aes(x=pe_effect_type , y = 0, fill = tile_colour
# , label = paste0(pe_effect_type,"\n", num_labels))) +
# geom_tile(width = 1, height = 1) + # make square tiles
# geom_text(color = "black", size = 1.7) + # add white text in the middle
# scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
# coord_fixed() + # make sure tiles are square
# coord_flip()+ scale_x_reverse() +
# # theme_void() # remove any axis markings
# theme_nothing() # remove any axis markings
# peP
#
# peP2 = ggplot(rect_pe, aes(x=pe_effect_type, y = 0, fill = tile_colour
# , label = paste0(pe_effect_type,"\n", num_labels))) +
# geom_tile() +
# geom_text(color = "black", size = 1.6) +
# scale_fill_identity(guide = "none") +
# coord_fixed() +
# theme_nothing()
# peP2