more plot files
This commit is contained in:
parent
04253b961f
commit
3d817fde0c
29 changed files with 3252 additions and 760 deletions
331
scripts/plotting/plotting_thesis/alr/prominent_effects_alr.R
Normal file
331
scripts/plotting/plotting_thesis/alr/prominent_effects_alr.R
Normal file
|
@ -0,0 +1,331 @@
|
|||
########################################################
|
||||
pos_colname = "position"
|
||||
|
||||
#-------------
|
||||
# from ~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R
|
||||
#-------------
|
||||
length(all_stability_cols); length(raw_stability_cols)
|
||||
length(scaled_stability_cols); length(outcome_stability_cols)
|
||||
length(affinity_dist_colnames)
|
||||
|
||||
|
||||
static_cols = c("mutationinformation",
|
||||
#"position",
|
||||
pos_colname,
|
||||
"sensitivity")
|
||||
|
||||
other_cols_all = c(scaled_stability_cols, scaled_affinity_cols, affinity_dist_colnames)
|
||||
|
||||
#omit avg cols and foldx_scaled_signC cols
|
||||
other_cols = other_cols_all[grep("avg", other_cols_all, invert = T)]
|
||||
other_cols = other_cols[grep("foldx_scaled_signC",other_cols, invert = T )]
|
||||
other_cols
|
||||
|
||||
cols_to_extract = c(static_cols, other_cols)
|
||||
cat("\nExtracting cols:", cols_to_extract)
|
||||
expected_ncols = length(static_cols) + length(other_cols)
|
||||
expected_ncols
|
||||
|
||||
str_df = merged_df3[, cols_to_extract]
|
||||
|
||||
if (ncol(str_df) == expected_ncols){
|
||||
cat("\nPASS: successfully extracted cols for calculating prominent effects")
|
||||
}else{
|
||||
stop("\nAbort: Could not extract cols for calculating prominent effects")
|
||||
}
|
||||
|
||||
#=========================
|
||||
# Masking affinity columns
|
||||
#=========================
|
||||
# First make values for affinity cols 0 when their corresponding dist >10
|
||||
head(str_df)
|
||||
|
||||
# replace in place affinity values >10
|
||||
str_df[str_df["ligand_distance"]>10,"affinity_scaled"]=0
|
||||
str_df[str_df["ligand_distance"]>10,"mmcsm_lig_scaled"]=0
|
||||
|
||||
#ppi2 gene: replace in place ppi2 affinity values where ppi2 dist >10
|
||||
if (tolower(gene)%in%geneL_ppi2){
|
||||
str_df[str_df["interface_dist"]>10,"mcsm_ppi2_scaled"]=0
|
||||
}
|
||||
|
||||
# na gene: replace in place na affinity values where na dist >10
|
||||
if (tolower(gene)%in%geneL_na){
|
||||
str_df[str_df["nca_distance"]>10,"mcsm_na_scaled"]=0
|
||||
}
|
||||
|
||||
colnames(str_df)
|
||||
head(str_df)
|
||||
|
||||
scaled_cols_tc = other_cols[grep("scaled", other_cols)]
|
||||
|
||||
|
||||
################################################
|
||||
#===============
|
||||
# whole df
|
||||
#===============
|
||||
give_col=function(x,y,df=str_df){
|
||||
df[df[[pos_colname]]==x,y]
|
||||
}
|
||||
|
||||
for (i in unique(str_df[[pos_colname]]) ){
|
||||
print(i)
|
||||
#cat(length(unique(str_df[[pos_colname]])))
|
||||
|
||||
biggest = max(abs(give_col(i,scaled_cols_tc)))
|
||||
|
||||
str_df[str_df[[pos_colname]]==i,'abs_max_effect'] = biggest
|
||||
str_df[str_df[[pos_colname]]==i,'effect_type']= names(
|
||||
give_col(i,scaled_cols_tc)[which(
|
||||
abs(
|
||||
give_col(i,scaled_cols_tc)
|
||||
) == biggest, arr.ind=T
|
||||
)[, "col"]])[1]
|
||||
|
||||
effect_name = unique(str_df[str_df[[pos_colname]]==i,'effect_type'])#[1] # pick first one in case we have multiple exact values
|
||||
|
||||
# get index/rowname for value of max effect, and then use it to get the original sign
|
||||
# here
|
||||
#ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T))
|
||||
ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c(pos_colname,effect_name)][effect_name])== biggest, arr.ind=T))
|
||||
|
||||
str_df[str_df[[pos_colname]]==i,'effect_sign'] = sign(str_df[effect_name][ind,])[1]
|
||||
}
|
||||
|
||||
# ends with suffix 2 if dups
|
||||
str_df$effect_type = sub("\\.[0-9]+", "", str_df$effect_type) # cull duplicate effect types that happen when there are exact duplicate values
|
||||
colnames(str_df)
|
||||
table(str_df$effect_type)
|
||||
|
||||
# check
|
||||
str_df_check = str_df[str_df[[pos_colname]]%in%c(24, 32, 160, 303, 334),]
|
||||
|
||||
#================
|
||||
# for Plots
|
||||
#================
|
||||
str_df_short = str_df[, c("mutationinformation",
|
||||
#"position",
|
||||
pos_colname,
|
||||
"sensitivity"
|
||||
, "effect_type"
|
||||
, "effect_sign")]
|
||||
|
||||
table(str_df_short$effect_type)
|
||||
table(str_df_short$effect_sign)
|
||||
str(str_df_short)
|
||||
|
||||
# assign pe outcome
|
||||
str_df_short$pe_outcome = ifelse(str_df_short$effect_sign<0, "DD", "SS")
|
||||
table(str_df_short$pe_outcome )
|
||||
table(str_df_short$effect_sign)
|
||||
|
||||
#==============
|
||||
# group effect type:
|
||||
# lig, ppi2, nuc. acid, stability
|
||||
#==============
|
||||
affcols = c("affinity_scaled", "mmcsm_lig_scaled")
|
||||
ppi2_cols = c("mcsm_ppi2_scaled")
|
||||
|
||||
#lig
|
||||
table(str_df_short$effect_type)
|
||||
str_df_short$effect_grouped = ifelse(str_df_short$effect_type%in%affcols
|
||||
, "lig"
|
||||
, str_df_short$effect_type)
|
||||
table(str_df_short$effect_grouped)
|
||||
|
||||
#ppi2
|
||||
str_df_short$effect_grouped = ifelse(str_df_short$effect_grouped%in%ppi2_cols
|
||||
, "ppi2"
|
||||
, str_df_short$effect_grouped)
|
||||
table(str_df_short$effect_grouped)
|
||||
|
||||
|
||||
#stability
|
||||
str_df_short$effect_grouped = ifelse(!str_df_short$effect_grouped%in%c("lig",
|
||||
"ppi2"
|
||||
)
|
||||
, "stability"
|
||||
, str_df_short$effect_grouped)
|
||||
|
||||
table(str_df_short$effect_grouped)
|
||||
|
||||
# create a sign as well
|
||||
str_df_short$pe_effect_outcome = paste0(str_df_short$pe_outcome, "_"
|
||||
, str_df_short$effect_grouped)
|
||||
|
||||
table(str_df_short$pe_effect_outcome)
|
||||
|
||||
#####################################################################
|
||||
# Chimera: for colouring
|
||||
####################################################################
|
||||
|
||||
#-------------------------------------
|
||||
# get df with unique position
|
||||
#--------------------------------------
|
||||
#data[!duplicated(data$x), ]
|
||||
str_df_plot = str_df_short[!duplicated(str_df[[pos_colname]]),]
|
||||
|
||||
if (nrow(str_df_plot) == length(unique(str_df[[pos_colname]]))){
|
||||
cat("\nPASS: successfully extracted df with unique positions")
|
||||
}else{
|
||||
stop("\nAbort: Could not extract df with unique positions")
|
||||
}
|
||||
|
||||
#-------------------------------------
|
||||
# generate colours for effect types
|
||||
#--------------------------------------
|
||||
str_df_plot_cols = str_df_plot[, c(pos_colname,
|
||||
"sensitivity",
|
||||
"pe_outcome",
|
||||
"effect_grouped",
|
||||
"pe_effect_outcome")]
|
||||
head(str_df_plot_cols)
|
||||
|
||||
# colour intensity based on sign
|
||||
#str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$effect_sign<0, "bright", "light")
|
||||
str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$pe_outcome=="DD", "bright", "light")
|
||||
|
||||
table(str_df_plot_cols$colour_hue); table(str_df_plot$pe_outcome)
|
||||
head(str_df_plot_cols)
|
||||
|
||||
# colour based on effect
|
||||
table(str_df_plot_cols$pe_effect_outcome)
|
||||
|
||||
# colors = c("#ffd700" #gold
|
||||
# , "#f0e68c" #khaki
|
||||
# , "#da70d6"# orchid
|
||||
# , "#ff1493"# deeppink
|
||||
# , "#a0522d" #sienna
|
||||
# , "#d2b48c" # tan
|
||||
# , "#00BFC4" #, "#007d85" #blue
|
||||
# , "#F8766D" )# red
|
||||
|
||||
pe_colour_map = c("DD_lig" = "#ffd700" # gold
|
||||
, "SS_lig" = "#f0e68c" # khaki
|
||||
|
||||
, "DD_nucleic_acid"= "#a0522d" # sienna
|
||||
, "SS_nucleic_acid"= "#d2b48c" # tan
|
||||
|
||||
, "DD_ppi2" = "#da70d6" # orchid
|
||||
, "SS_ppi2" = "#ff1493" # deeppink
|
||||
|
||||
, "DD_stability" = "#f8766d" # red
|
||||
, "SS_stability" = "#00BFC4") # blue
|
||||
|
||||
#unlist(d[c('a', 'a', 'c', 'b')], use.names=FALSE)
|
||||
|
||||
#map the colours
|
||||
str_df_plot_cols$colour_map= unlist(map(str_df_plot_cols$pe_effect_outcome
|
||||
,function(x){pe_colour_map[[x]]}
|
||||
))
|
||||
head(str_df_plot_cols$colour_map)
|
||||
table(str_df_plot_cols$colour_map)
|
||||
table(str_df_plot_cols$pe_effect_outcome)
|
||||
|
||||
# str_df_plot_cols$colours = paste0(str_df_plot_cols$colour_hue
|
||||
# , "_"
|
||||
# , str_df_plot_cols$colour_map)
|
||||
# head(str_df_plot_cols$colours)
|
||||
# table(str_df_plot_cols$colours)
|
||||
#
|
||||
#
|
||||
# class(str_df_plot_cols$colour_map)
|
||||
# str(str_df_plot_cols)
|
||||
|
||||
# sort by colour
|
||||
head(str_df_plot_cols)
|
||||
str_df_plot_cols = str_df_plot_cols[order(str_df_plot_cols$colour_map), ]
|
||||
head(str_df_plot_cols)
|
||||
|
||||
#======================================
|
||||
# write file with prominent effects
|
||||
#======================================
|
||||
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
|
||||
write.csv(str_df_plot_cols, paste0(outdir_images, tolower(gene), "_prominent_effects.csv"))
|
||||
|
||||
################################
|
||||
# printing for chimera
|
||||
###############################
|
||||
chain_suffix = ".A"
|
||||
str_df_plot_cols$pos_chain = paste0(str_df_plot_cols[[pos_colname]], chain_suffix)
|
||||
table(str_df_plot_cols$colour_map)
|
||||
table(str_df_plot_cols$pe_effect_outcome)
|
||||
|
||||
#===================================================
|
||||
#-------------------
|
||||
# Ligand Affinity
|
||||
#-------------------
|
||||
# -ve Lig Aff
|
||||
dd_lig = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_lig",]
|
||||
if (nrow(dd_lig) == table(str_df_plot_cols$pe_effect_outcome)[['DD_lig']]){
|
||||
dd_lig_pos = dd_lig[[pos_colname]]
|
||||
}else{
|
||||
stop("Abort: DD affinity colour numbers mismtatch")
|
||||
}
|
||||
|
||||
# +ve Lig Aff
|
||||
ss_lig = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_lig",]
|
||||
if (!empty(ss_lig)){
|
||||
if (nrow(ss_lig) == table(str_df_plot_cols$pe_effect_outcome)[['SS_lig']]){
|
||||
ss_lig_pos = ss_lig[[pos_colname]]
|
||||
}else{
|
||||
stop("Abort: SS affinity colour numbers mismtatch")
|
||||
}
|
||||
#put in chimera cmd
|
||||
paste0(dd_lig_pos, chain_suffix)
|
||||
paste0(ss_lig_pos, chain_suffix)
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#===================================================
|
||||
#-------------------
|
||||
# PPI2 Affinity
|
||||
#-------------------
|
||||
# -ve PPI2
|
||||
dd_ppi2 = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_ppi2",]
|
||||
if (nrow(dd_ppi2) == table(str_df_plot_cols$pe_effect_outcome)[['DD_ppi2']]){
|
||||
dd_ppi2_pos = dd_ppi2[[pos_colname]]
|
||||
}else{
|
||||
stop("Abort: DD PPI2 colour numbers mismtatch")
|
||||
}
|
||||
|
||||
# +ve PPI2
|
||||
ss_ppi2 = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_ppi2",]
|
||||
if (nrow(ss_ppi2) == table(str_df_plot_cols$pe_effect_outcome)[['SS_ppi2']]){
|
||||
ss_ppi2_pos = ss_ppi2[[pos_colname]]
|
||||
}else{
|
||||
stop("Abort: SS PPI2 colour numbers mismtatch")
|
||||
}
|
||||
|
||||
#put in chimera cmd
|
||||
paste0(dd_ppi2_pos,chain_suffix)
|
||||
paste0(ss_ppi2_pos,chain_suffix)
|
||||
|
||||
#=========================================================
|
||||
#------------------------
|
||||
# Stability
|
||||
#------------------------
|
||||
# -ve Stability
|
||||
dd_stability = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_stability",]
|
||||
if (nrow(dd_stability) == table(str_df_plot_cols$pe_effect_outcome)[['DD_stability']]){
|
||||
dd_stability_pos = dd_stability[[pos_colname]]
|
||||
}else{
|
||||
stop("Abort: DD Stability colour numbers mismtatch")
|
||||
}
|
||||
|
||||
# +ve Stability
|
||||
ss_stability = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_stability",]
|
||||
if (nrow(ss_stability) == table(str_df_plot_cols$pe_effect_outcome)[['SS_stability']]){
|
||||
ss_stability_pos = ss_stability[[pos_colname]]
|
||||
}else{
|
||||
stop("Abort: SS Stability colour numbers mismtatch")
|
||||
}
|
||||
|
||||
#put in chimera cmd
|
||||
# stabiliting first as it has less numbers
|
||||
paste0(ss_stability_pos, chain_suffix)
|
||||
paste0(dd_stability_pos, chain_suffix)
|
||||
####################################################################
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue