LSHTM_analysis/scripts/plotting/plotting_thesis/alr/prominent_effects_alr.R

########################################################
pos_colname = "position"

#-------------
# from ~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R
#-------------
length(all_stability_cols); length(raw_stability_cols)
length(scaled_stability_cols); length(outcome_stability_cols)
length(affinity_dist_colnames)


static_cols =  c("mutationinformation",
                 #"position",
                 pos_colname,
                 "sensitivity")

other_cols_all = c(scaled_stability_cols, scaled_affinity_cols, affinity_dist_colnames)

#omit avg cols and foldx_scaled_signC cols
other_cols =  other_cols_all[grep("avg", other_cols_all, invert = T)]
other_cols = other_cols[grep("foldx_scaled_signC",other_cols, invert = T )]
other_cols

cols_to_extract =  c(static_cols, other_cols)
cat("\nExtracting cols:", cols_to_extract)
expected_ncols = length(static_cols) + length(other_cols)
expected_ncols

str_df = merged_df3[, cols_to_extract]

if (ncol(str_df) == expected_ncols){
  cat("\nPASS: successfully extracted cols for calculating prominent effects")
}else{
  stop("\nAbort: Could not extract cols for calculating prominent effects")
}

#=========================
# Masking affinity columns
#=========================
# First make values for affinity cols 0 when their corresponding dist >10
head(str_df)

# replace in place affinity values >10
str_df[str_df["ligand_distance"]>10,"affinity_scaled"]=0
str_df[str_df["ligand_distance"]>10,"mmcsm_lig_scaled"]=0

#ppi2 gene: replace in place ppi2 affinity values where ppi2 dist >10
if (tolower(gene)%in%geneL_ppi2){
  str_df[str_df["interface_dist"]>10,"mcsm_ppi2_scaled"]=0
}

# na gene: replace in place na affinity values where na dist >10
if (tolower(gene)%in%geneL_na){
  str_df[str_df["nca_distance"]>10,"mcsm_na_scaled"]=0
}

colnames(str_df)
head(str_df)

scaled_cols_tc = other_cols[grep("scaled", other_cols)]


################################################
#===============
# whole df
#===============
give_col=function(x,y,df=str_df){
  df[df[[pos_colname]]==x,y]
}

for (i in unique(str_df[[pos_colname]]) ){
  print(i)
  #cat(length(unique(str_df[[pos_colname]])))

  biggest     = max(abs(give_col(i,scaled_cols_tc)))

  str_df[str_df[[pos_colname]]==i,'abs_max_effect'] = biggest
  str_df[str_df[[pos_colname]]==i,'effect_type']= names(
    give_col(i,scaled_cols_tc)[which(
      abs(
        give_col(i,scaled_cols_tc)
      ) == biggest, arr.ind=T
    )[, "col"]])[1]

  effect_name = unique(str_df[str_df[[pos_colname]]==i,'effect_type'])#[1] # pick first one in case we have multiple exact values

  # get index/rowname for value of max effect, and then use it to get the original sign
  # here
  #ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T))
  ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c(pos_colname,effect_name)][effect_name])== biggest, arr.ind=T))

  str_df[str_df[[pos_colname]]==i,'effect_sign'] = sign(str_df[effect_name][ind,])[1]
}

# ends with suffix 2 if dups
str_df$effect_type = sub("\\.[0-9]+", "", str_df$effect_type) # cull duplicate effect types that happen when there are exact duplicate values
colnames(str_df)
table(str_df$effect_type)

# check
str_df_check = str_df[str_df[[pos_colname]]%in%c(24, 32, 160, 303, 334),]

#================
# for Plots
#================
str_df_short = str_df[, c("mutationinformation",
                          #"position",
                          pos_colname,
                          "sensitivity"
                          , "effect_type"
                          , "effect_sign")]

table(str_df_short$effect_type)
table(str_df_short$effect_sign)
str(str_df_short)

# assign pe outcome
str_df_short$pe_outcome = ifelse(str_df_short$effect_sign<0, "DD", "SS")
table(str_df_short$pe_outcome )
table(str_df_short$effect_sign)

#==============
# group effect type:
# lig, ppi2, nuc. acid, stability
#==============
affcols = c("affinity_scaled",  "mmcsm_lig_scaled")
ppi2_cols = c("mcsm_ppi2_scaled")

#lig
table(str_df_short$effect_type)
str_df_short$effect_grouped = ifelse(str_df_short$effect_type%in%affcols
                                     , "lig"
                                     , str_df_short$effect_type)
table(str_df_short$effect_grouped)

#ppi2
str_df_short$effect_grouped = ifelse(str_df_short$effect_grouped%in%ppi2_cols
                                     , "ppi2"
                                     , str_df_short$effect_grouped)
table(str_df_short$effect_grouped)


#stability
str_df_short$effect_grouped = ifelse(!str_df_short$effect_grouped%in%c("lig",
                                                                       "ppi2"
                                                                       )
                                     , "stability"
                                     , str_df_short$effect_grouped)

table(str_df_short$effect_grouped)

# create a sign as well
str_df_short$pe_effect_outcome = paste0(str_df_short$pe_outcome, "_"
                                     , str_df_short$effect_grouped)

table(str_df_short$pe_effect_outcome)

#####################################################################
# Chimera: for colouring
####################################################################

#-------------------------------------
# get df with unique position
#--------------------------------------
#data[!duplicated(data$x), ]
str_df_plot = str_df_short[!duplicated(str_df[[pos_colname]]),]

if (nrow(str_df_plot) == length(unique(str_df[[pos_colname]]))){
  cat("\nPASS: successfully extracted df with unique positions")
}else{
  stop("\nAbort: Could not extract df with unique positions")
}

#-------------------------------------
# generate colours for effect types
#--------------------------------------
str_df_plot_cols = str_df_plot[, c(pos_colname,
                                   "sensitivity",
                                   "pe_outcome",
                                   "effect_grouped",
                                   "pe_effect_outcome")]
head(str_df_plot_cols)

# colour intensity based on sign
#str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$effect_sign<0, "bright", "light")
str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$pe_outcome=="DD", "bright", "light")

table(str_df_plot_cols$colour_hue); table(str_df_plot$pe_outcome)
head(str_df_plot_cols)

# colour based on effect
table(str_df_plot_cols$pe_effect_outcome)

pe_colour_map = c("DD_lig"           = "#f0e68c"  # khaki
                  , "SS_lig"         = "#ffd700"  # gold

                  , "DD_nucleic_acid"= "#d2b48c"  # sandybrown
                  , "SS_nucleic_acid"= "#a0522d"  # sienna

                  , "DD_ppi2"        = "#da70d6"  # orchid
                  , "SS_ppi2"        = "#ff1493"  # deeppink

                  , "DD_stability"   = "#f8766d"  # red
                  , "SS_stability"   = "#00BFC4") # blue
#unlist(d[c('a', 'a', 'c', 'b')], use.names=FALSE)

#map the colours
str_df_plot_cols$colour_map= unlist(map(str_df_plot_cols$pe_effect_outcome
                                 ,function(x){pe_colour_map[[x]]}
                                 ))
head(str_df_plot_cols$colour_map)
table(str_df_plot_cols$colour_map)
table(str_df_plot_cols$pe_effect_outcome)

# str_df_plot_cols$colours = paste0(str_df_plot_cols$colour_hue
#                                   , "_"
#                                   , str_df_plot_cols$colour_map)
# head(str_df_plot_cols$colours)
# table(str_df_plot_cols$colours)
#
#
# class(str_df_plot_cols$colour_map)
# str(str_df_plot_cols)

# sort by colour
head(str_df_plot_cols)
str_df_plot_cols = str_df_plot_cols[order(str_df_plot_cols$colour_map), ]
head(str_df_plot_cols)

#======================================
# write file with prominent effects
#======================================
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
write.csv(str_df_plot_cols, paste0(outdir_images, tolower(gene), "_prominent_effects.csv"))

################################
# printing for chimera
###############################
chain_suffix = ".A"
str_df_plot_cols$pos_chain = paste0(str_df_plot_cols[[pos_colname]], chain_suffix)
table(str_df_plot_cols$colour_map)
table(str_df_plot_cols$pe_effect_outcome)

#===================================================
#-------------------
# Ligand Affinity
#-------------------
# -ve Lig Aff
dd_lig        = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_lig",]
if (nrow(dd_lig) == table(str_df_plot_cols$pe_effect_outcome)[['DD_lig']]){
  dd_lig_pos  = dd_lig[[pos_colname]]
}else{
  stop("Abort: DD affinity colour numbers mismtatch")
  print(toString(paste0(dd_lig_pos, chain_suffix)))

}

# +ve Lig Aff
ss_lig            = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_lig",]
if (!empty(ss_lig)){
  if (nrow(ss_lig)  == table(str_df_plot_cols$pe_effect_outcome)[['SS_lig']]){
    ss_lig_pos      = ss_lig[[pos_colname]]
  }else{
    stop("Abort: SS affinity colour numbers mismtatch")
  }
  #put in chimera cmd
  print(toString(paste0(ss_lig_pos, chain_suffix)))
}

#===================================================
#-------------------
# PPI2 Affinity
#-------------------
# -ve PPI2
dd_ppi2           =  str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_ppi2",]
if (nrow(dd_ppi2) == table(str_df_plot_cols$pe_effect_outcome)[['DD_ppi2']]){
  dd_ppi2_pos     =  dd_ppi2[[pos_colname]]
}else{
  stop("Abort: DD PPI2 colour numbers mismtatch")
  print(toString(paste0(dd_ppi2_pos,chain_suffix)))

}

# +ve PPI2
ss_ppi2            = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_ppi2",]
if (nrow(ss_ppi2)  == table(str_df_plot_cols$pe_effect_outcome)[['SS_ppi2']]){
  ss_ppi2_pos      = ss_ppi2[[pos_colname]]
}else{
  stop("Abort: SS PPI2 colour numbers mismtatch")
  print(toString(paste0(ss_ppi2_pos,chain_suffix)))

}

#put in chimera cmd

#=========================================================
#------------------------
# Stability
#------------------------
# -ve Stability
dd_stability           =  str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_stability",]
if (nrow(dd_stability) == table(str_df_plot_cols$pe_effect_outcome)[['DD_stability']]){
  dd_stability_pos     =  dd_stability[[pos_colname]]
}else{
  stop("Abort: DD Stability colour numbers mismtatch")
}

# +ve Stability
ss_stability            = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_stability",]
if (nrow(ss_stability)  == table(str_df_plot_cols$pe_effect_outcome)[['SS_stability']]){
  ss_stability_pos      = ss_stability[[pos_colname]]
}else{
  stop("Abort: SS Stability colour numbers mismtatch")
}

# put in chimera cmd
print(toString(paste0(dd_stability_pos, chain_suffix)))
print(toString(paste0(ss_stability_pos, chain_suffix)))

####################################################################