######################################################## pos_colname = "position" #------------- # from ~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R #------------- length(all_stability_cols); length(raw_stability_cols) length(scaled_stability_cols); length(outcome_stability_cols) length(affinity_dist_colnames) static_cols = c("mutationinformation", #"position", pos_colname, "sensitivity") other_cols_all = c(scaled_stability_cols, scaled_affinity_cols, affinity_dist_colnames) #omit avg cols and foldx_scaled_signC cols other_cols = other_cols_all[grep("avg", other_cols_all, invert = T)] other_cols = other_cols[grep("foldx_scaled_signC",other_cols, invert = T )] other_cols cols_to_extract = c(static_cols, other_cols) cat("\nExtracting cols:", cols_to_extract) expected_ncols = length(static_cols) + length(other_cols) expected_ncols str_df = merged_df3[, cols_to_extract] if (ncol(str_df) == expected_ncols){ cat("\nPASS: successfully extracted cols for calculating prominent effects") }else{ stop("\nAbort: Could not extract cols for calculating prominent effects") } #========================= # Masking affinity columns #========================= # First make values for affinity cols 0 when their corresponding dist >10 head(str_df) # replace in place affinity values >10 str_df[str_df["ligand_distance"]>10,"affinity_scaled"]=0 str_df[str_df["ligand_distance"]>10,"mmcsm_lig_scaled"]=0 #ppi2 gene: replace in place ppi2 affinity values where ppi2 dist >10 if (tolower(gene)%in%geneL_ppi2){ str_df[str_df["interface_dist"]>10,"mcsm_ppi2_scaled"]=0 } # na gene: replace in place na affinity values where na dist >10 if (tolower(gene)%in%geneL_na){ str_df[str_df["nca_distance"]>10,"mcsm_na_scaled"]=0 } colnames(str_df) head(str_df) scaled_cols_tc = other_cols[grep("scaled", other_cols)] ################################################ #=============== # whole df #=============== give_col=function(x,y,df=str_df){ df[df[[pos_colname]]==x,y] } for (i in unique(str_df[[pos_colname]]) ){ print(i) #cat(length(unique(str_df[[pos_colname]]))) biggest = max(abs(give_col(i,scaled_cols_tc))) str_df[str_df[[pos_colname]]==i,'abs_max_effect'] = biggest str_df[str_df[[pos_colname]]==i,'effect_type']= names( give_col(i,scaled_cols_tc)[which( abs( give_col(i,scaled_cols_tc) ) == biggest, arr.ind=T )[, "col"]])[1] effect_name = unique(str_df[str_df[[pos_colname]]==i,'effect_type'])#[1] # pick first one in case we have multiple exact values # get index/rowname for value of max effect, and then use it to get the original sign # here #ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T)) ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c(pos_colname,effect_name)][effect_name])== biggest, arr.ind=T)) str_df[str_df[[pos_colname]]==i,'effect_sign'] = sign(str_df[effect_name][ind,])[1] } # ends with suffix 2 if dups str_df$effect_type = sub("\\.[0-9]+", "", str_df$effect_type) # cull duplicate effect types that happen when there are exact duplicate values colnames(str_df) table(str_df$effect_type) # check str_df_check = str_df[str_df[[pos_colname]]%in%c(24, 32, 160, 303, 334),] #================ # for Plots #================ str_df_short = str_df[, c("mutationinformation", #"position", pos_colname, "sensitivity" , "effect_type" , "effect_sign")] table(str_df_short$effect_type) table(str_df_short$effect_sign) str(str_df_short) # assign pe outcome str_df_short$pe_outcome = ifelse(str_df_short$effect_sign<0, "DD", "SS") table(str_df_short$pe_outcome ) table(str_df_short$effect_sign) #============== # group effect type: # lig, ppi2, nuc. acid, stability #============== affcols = c("affinity_scaled", "mmcsm_lig_scaled") ppi2_cols = c("mcsm_ppi2_scaled") #lig table(str_df_short$effect_type) str_df_short$effect_grouped = ifelse(str_df_short$effect_type%in%affcols , "lig" , str_df_short$effect_type) table(str_df_short$effect_grouped) #ppi2 str_df_short$effect_grouped = ifelse(str_df_short$effect_grouped%in%ppi2_cols , "ppi2" , str_df_short$effect_grouped) table(str_df_short$effect_grouped) #stability str_df_short$effect_grouped = ifelse(!str_df_short$effect_grouped%in%c("lig", "ppi2" ) , "stability" , str_df_short$effect_grouped) table(str_df_short$effect_grouped) # create a sign as well str_df_short$pe_effect_outcome = paste0(str_df_short$pe_outcome, "_" , str_df_short$effect_grouped) table(str_df_short$pe_effect_outcome) ##################################################################### # Chimera: for colouring #################################################################### #------------------------------------- # get df with unique position #-------------------------------------- #data[!duplicated(data$x), ] str_df_plot = str_df_short[!duplicated(str_df[[pos_colname]]),] if (nrow(str_df_plot) == length(unique(str_df[[pos_colname]]))){ cat("\nPASS: successfully extracted df with unique positions") }else{ stop("\nAbort: Could not extract df with unique positions") } #------------------------------------- # generate colours for effect types #-------------------------------------- str_df_plot_cols = str_df_plot[, c(pos_colname, "sensitivity", "pe_outcome", "effect_grouped", "pe_effect_outcome")] head(str_df_plot_cols) # colour intensity based on sign #str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$effect_sign<0, "bright", "light") str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$pe_outcome=="DD", "bright", "light") table(str_df_plot_cols$colour_hue); table(str_df_plot$pe_outcome) head(str_df_plot_cols) # colour based on effect table(str_df_plot_cols$pe_effect_outcome) # colors = c("#ffd700" #gold # , "#f0e68c" #khaki # , "#da70d6"# orchid # , "#ff1493"# deeppink # , "#a0522d" #sienna # , "#d2b48c" # tan # , "#00BFC4" #, "#007d85" #blue # , "#F8766D" )# red pe_colour_map = c("DD_lig" = "#ffd700" # gold , "SS_lig" = "#f0e68c" # khaki , "DD_nucleic_acid"= "#a0522d" # sienna , "SS_nucleic_acid"= "#d2b48c" # tan , "DD_ppi2" = "#da70d6" # orchid , "SS_ppi2" = "#ff1493" # deeppink , "DD_stability" = "#f8766d" # red , "SS_stability" = "#00BFC4") # blue #unlist(d[c('a', 'a', 'c', 'b')], use.names=FALSE) #map the colours str_df_plot_cols$colour_map= unlist(map(str_df_plot_cols$pe_effect_outcome ,function(x){pe_colour_map[[x]]} )) head(str_df_plot_cols$colour_map) table(str_df_plot_cols$colour_map) table(str_df_plot_cols$pe_effect_outcome) # str_df_plot_cols$colours = paste0(str_df_plot_cols$colour_hue # , "_" # , str_df_plot_cols$colour_map) # head(str_df_plot_cols$colours) # table(str_df_plot_cols$colours) # # # class(str_df_plot_cols$colour_map) # str(str_df_plot_cols) # sort by colour head(str_df_plot_cols) str_df_plot_cols = str_df_plot_cols[order(str_df_plot_cols$colour_map), ] head(str_df_plot_cols) #====================================== # write file with prominent effects #====================================== outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") write.csv(str_df_plot_cols, paste0(outdir_images, tolower(gene), "_prominent_effects.csv")) ################################ # printing for chimera ############################### chain_suffix = ".A" str_df_plot_cols$pos_chain = paste0(str_df_plot_cols[[pos_colname]], chain_suffix) table(str_df_plot_cols$colour_map) table(str_df_plot_cols$pe_effect_outcome) #=================================================== #------------------- # Ligand Affinity #------------------- # -ve Lig Aff dd_lig = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_lig",] if (nrow(dd_lig) == table(str_df_plot_cols$pe_effect_outcome)[['DD_lig']]){ dd_lig_pos = dd_lig[[pos_colname]] }else{ stop("Abort: DD affinity colour numbers mismtatch") print(toString(paste0(dd_lig_pos, chain_suffix))) } # +ve Lig Aff ss_lig = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_lig",] if (!empty(ss_lig)){ if (nrow(ss_lig) == table(str_df_plot_cols$pe_effect_outcome)[['SS_lig']]){ ss_lig_pos = ss_lig[[pos_colname]] }else{ stop("Abort: SS affinity colour numbers mismtatch") } #put in chimera cmd print(toString(paste0(ss_lig_pos, chain_suffix))) } #=================================================== #------------------- # PPI2 Affinity #------------------- # -ve PPI2 dd_ppi2 = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_ppi2",] if (nrow(dd_ppi2) == table(str_df_plot_cols$pe_effect_outcome)[['DD_ppi2']]){ dd_ppi2_pos = dd_ppi2[[pos_colname]] }else{ stop("Abort: DD PPI2 colour numbers mismtatch") print(toString(paste0(dd_ppi2_pos,chain_suffix))) } # +ve PPI2 ss_ppi2 = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_ppi2",] if (nrow(ss_ppi2) == table(str_df_plot_cols$pe_effect_outcome)[['SS_ppi2']]){ ss_ppi2_pos = ss_ppi2[[pos_colname]] }else{ stop("Abort: SS PPI2 colour numbers mismtatch") print(toString(paste0(ss_ppi2_pos,chain_suffix))) } #put in chimera cmd #========================================================= #------------------------ # Stability #------------------------ # -ve Stability dd_stability = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_stability",] if (nrow(dd_stability) == table(str_df_plot_cols$pe_effect_outcome)[['DD_stability']]){ dd_stability_pos = dd_stability[[pos_colname]] }else{ stop("Abort: DD Stability colour numbers mismtatch") } # +ve Stability ss_stability = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_stability",] if (nrow(ss_stability) == table(str_df_plot_cols$pe_effect_outcome)[['SS_stability']]){ ss_stability_pos = ss_stability[[pos_colname]] }else{ stop("Abort: SS Stability colour numbers mismtatch") } # put in chimera cmd print(toString(paste0(dd_stability_pos, chain_suffix))) print(toString(paste0(ss_stability_pos, chain_suffix))) ####################################################################