#!/usr/bin/env Rscript #source("~/git/LSHTM_analysis/config/alr.R") source("~/git/LSHTM_analysis/config/embb.R") #source("~/git/LSHTM_analysis/config/katg.R") #source("~/git/LSHTM_analysis/config/gid.R") #source("~/git/LSHTM_analysis/config/pnca.R") #source("~/git/LSHTM_analysis/config/rpob.R") # get plotting dfs source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") length(all_stability_cols); length(raw_stability_cols) length(scaled_stability_cols); length(outcome_stability_cols) length(affinity_dist_colnames) static_cols = c("mutationinformation", "position", "sensitivity") other_cols_all = c(scaled_stability_cols, scaled_affinity_cols, affinity_dist_colnames) #omit avg cols and foldx_scaled_signC cols other_cols = other_cols_all[grep("avg", other_cols_all, invert = T)] other_cols = other_cols[grep("foldx_scaled_signC",other_cols, invert = T )] other_cols cols_to_extract = c(static_cols, other_cols) expected_ncols = length(static_cols) + length(other_cols) str_df = merged_df3[, cols_to_extract] if (ncol(str_df) == expected_ncols){ cat("\nPASS: successfully extracted cols for calculating prominent effects") }else{ stop("\nAbort: Could not extract cols for calculating prominent effects") } #========================= # Masking affinity columns #========================= # First make values for affinity cols 0 when their corresponding dist >10 head(str_df) # replace in place affinity values >10 str_df[str_df["ligand_distance"]>10,"affinity_scaled"]=0 str_df[str_df["ligand_distance"]>10,"mmcsm_lig_scaled"]=0 #ppi2 gene: replace in place ppi2 affinity values where ppi2 dist >10 if (tolower(gene)%in%geneL_ppi2){ str_df[str_df["interface_dist"]>10,"mcsm_ppi2_scaled"]=0 } # na gene: replace in place na affinity values where na dist >10 if (tolower(gene)%in%geneL_na){ str_df[str_df["XXXX"]>10,"mcsm_na_scaled"]=0 } colnames(str_df) head(str_df) # get names of cols to calculate the prominent effects from scaled_cols_tc = c("duet_scaled", "deepddg_scaled" , "ddg_dynamut2_scaled", "foldx_scaled","affinity_scaled" , "mmcsm_lig_scaled" , "mcsm_ppi2_scaled") #-------------------------------- #get rowmax for absolute values #-------------------------------- #str_df$row_maximum = apply(str_df[,-1], 1, function(x){max(abs(x))}) #str_df$row_maximum = apply(str_df[,scaled_cols_tc], 1, function(x){max(abs(x))}) #correct #BOO= abs(str_df[,scaled_cols_tc]) == str_df[,'row_maximum']; head(BOO) #also corr but weird #POO = which(abs(str_df[,scaled_cols_tc]) == str_df[,'row_maximum'], arr.ind =T); head(POO) ################################################ # #------------- # # short df: try # #------------- # df2_short = str_df[str_df$position%in%c(167, 423, 427),] # df2_short = str_df[str_df$position%in%c(170, 167, 493, 453, 435, 433, 480, 456, 445),] # df2_short = str_df[str_df$position%in%c(435, 480),] # # # give_col=function(x,y,df=df2_short){ # df[df$position==x,y] # } # # for (i in unique(df2_short$position) ){ # print(i) # #print(paste0("\nNo. of unique positions:", length(unique(df2$position))) ) # #cat(length(unique(df2$position))) # #df2_short[df2_short$position==i,scaled_cols_tc] # # biggest = max(abs(give_col(i,scaled_cols_tc))) # # df2_short[df2_short$position==i,'abs_max_effect'] = biggest # df2_short[df2_short$position==i,'effect_type']= names( # give_col(i,scaled_cols_tc)[which( # abs( # give_col(i,scaled_cols_tc) # ) == biggest, arr.ind=T # )[, "col"]]) # # effect_name = df2_short[df2_short$position==i,'effect_type'][1] # pick first one in case we have multiple exact values # # # get index/rowname for value of max effect, and then use it to get the original sign # # here # #df2_short[df2_short$position==i,c(effect_name)] # #which(abs(df2_short[df2_short$position==i,c('position',effect_name)][effect_name])==biggest, arr.ind=T) # ind = rownames(which(abs(df2_short[df2_short$position==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T)) # df2_short[df2_short$position==i,'effect_sign'] = sign(df2_short[effect_name][ind,]) # } # # # ends with suffix 2 if dups # df2_short$effect_type = sub("\\.[0-9]+", "", df2_short$effect_type) # cull duplicate effect types that happen when there are exact duplicate values # # View(df2_short) #=============== # whole df #=============== give_col=function(x,y,df=str_df){ df[df$position==x,y] } for (i in unique(str_df$position) ){ print(i) #cat(length(unique(str_df$position))) biggest = max(abs(give_col(i,scaled_cols_tc))) str_df[str_df$position==i,'abs_max_effect'] = biggest str_df[str_df$position==i,'effect_type']= names( give_col(i,scaled_cols_tc)[which( abs( give_col(i,scaled_cols_tc) ) == biggest, arr.ind=T )[, "col"]])[1] effect_name = unique(str_df[str_df$position==i,'effect_type'])#[1] # pick first one in case we have multiple exact values # get index/rowname for value of max effect, and then use it to get the original sign # here ind = rownames(which(abs(str_df[str_df$position==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T)) str_df[str_df$position==i,'effect_sign'] = sign(str_df[effect_name][ind,])[1] } # ends with suffix 2 if dups str_df$effect_type = sub("\\.[0-9]+", "", str_df$effect_type) # cull duplicate effect types that happen when there are exact duplicate values colnames(str_df) # check str_df_check = str_df[str_df$position%in%c(24, 32,160, 303, 334),] table(str_df$effect_type) #================ # for Plots #================ str_df_short = str_df[, c("mutationinformation","position","sensitivity" , "effect_type" , "effect_sign")] table(str_df_short$effect_type) table(str_df_short$effect_sign) str(str_df_short) # assign pe outcome str_df_short$pe_outcome = ifelse(str_df_short$effect_sign<0, "DD", "SS") table(str_df_short$pe_outcome ) table(str_df_short$effect_sign) #============== # group effect type: # lig, ppi2, nuc. acid, stability #============== affcols = c("affinity_scaled", "mmcsm_lig_scaled") ppi2_cols = c("mcsm_ppi2_scaled") #nuc_na_cols = c("mcsm_a_scaled") #lig table(str_df_short$effect_type) str_df_short$effect_grouped = ifelse(str_df_short$effect_type%in%affcols , "lig" , str_df_short$effect_type) table(str_df_short$effect_grouped) #ppi2 str_df_short$effect_grouped = ifelse(str_df_short$effect_grouped%in%ppi2_cols , "ppi2" , str_df_short$effect_grouped) table(str_df_short$effect_grouped) #stability str_df_short$effect_grouped = ifelse(!str_df_short$effect_grouped%in%c("lig", "ppi2") , "stability" , str_df_short$effect_grouped) table(str_df_short$effect_grouped) # create a sign as well str_df_short$pe_effect_outcome = paste0(str_df_short$pe_outcome, "_" , str_df_short$effect_grouped) table(str_df_short$pe_effect_outcome) ##################################################################### # Chimera: for colouring #################################################################### #------------------------------------- # get df with unique position #-------------------------------------- #data[!duplicated(data$x), ] str_df_plot = str_df[!duplicated(str_df$position),] if (nrow(str_df_plot) == length(unique(str_df$position))){ cat("\nPASS: successfully extracted df with unique positions") }else{ stop("\nAbort: Could not extract df with unique positions") } #------------------------------------- # generate colours for effect types #-------------------------------------- str_df_plot_cols = str_df_plot[, c("position", "sensitivity" , affinity_dist_colnames , "abs_max_effect" , "effect_type" , "effect_sign")] head(str_df_plot_cols) # colour intensity based on sign str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$effect_sign<0, "bright", "light") table(str_df_plot_cols$colour_hue) head(str_df_plot_cols) # colour based on effect table(str_df_plot_cols$effect_type) pe_colour_map = c("affinity_scaled" = "salmon" , "mmcsm_lig_scaled" = "salmon" , "mcsm_ppi2_scaled" = "pink" , "mcsm_na_scaled" = "orange" , "duet_scaled" = "dimgray" , "deepddg_scaled" = "dimgray" , "ddg_dynamut2_scaled"= "dimgray" , "foldx_scaled" = "dimgray") #unlist(d[c('a', 'a', 'c', 'b')], use.names=FALSE) #map the colours str_df_plot_cols$colour_map= unlist(map(str_df_plot_cols$effect_type ,function(x){pe_colour_map[[x]]} )) str_df_plot_cols$colours = paste0(str_df_plot_cols$colour_hue , "_" , str_df_plot_cols$colour_map) head(str_df_plot_cols$colours) table(str_df_plot_cols$colours) class(str_df_plot_cols$colour_map) str(str_df_plot_cols) # sort by colour head(str_df_plot_cols) str_df_plot_cols = str_df_plot_cols[order(str_df_plot_cols$colour_map), ] head(str_df_plot_cols) #====================================== # write file with prominent effects #====================================== outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") write.csv(str_df_plot_cols, paste0(outdir_images, tolower(gene), "_prominent_effects.csv")) ################################ # printing for chimera ############################### str_df_plot_cols$pos_chain = paste0(str_df_plot_cols$position, ".B,") table(str_df_plot_cols$colour_map) #=================================================== #------------------- # Ligand Affinity #------------------- foo = str_df_plot_cols[str_df_plot_cols$colours=="yellow",] all(foo2$effect_sign == 1) foo1 = str_df_plot_cols[str_df_plot_cols$colours=="bright_salmon",] all(foo3$effect_sign == -1) #light salmon: stabilising affinity table(str_df_plot_cols$colours) affinity_pos_l = str_df_plot_cols$pos_chain[str_df_plot_cols$colours=="light_salmon"] affinity_pos_lc = paste(affinity_pos_l, collapse = "") affinity_pos_lc table(str_df_plot_cols$colours)[["light_salmon"]] #bright salmon: DEstabilsing affinity affinity_pos_b = str_df_plot_cols$pos_chain[str_df_plot_cols$colours=="bright_salmon"] affinity_pos_bc = paste(affinity_pos_b, collapse = "") affinity_pos_bc table(str_df_plot_cols$colours)[["bright_salmon"]] c1 = length(affinity_pos_l) + length(affinity_pos_b) == table(str_df_plot_cols$colour_map)[["salmon"]] if (c1){ cat("PASS: affinity colour numbers checked") }else{ stop("Abort: affinity colour numbers mismtatch") } #put in chimera cmd affinity_pos_lc affinity_pos_bc #=================================================== #------------------- # ppi2 Affinity #------------------- foo2 = str_df_plot_cols[str_df_plot_cols$colours=="light_pink",] all(foo2$effect_sign == 1) foo3 = str_df_plot_cols[str_df_plot_cols$colours=="bright_pink",] all(foo3$effect_sign == -1) #light_pink: stabilising affinity table(str_df_plot_cols$colours) ppi2_pos_l = str_df_plot_cols$pos_chain[str_df_plot_cols$colours=="light_pink"] ppi2_pos_lc = paste(ppi2_pos_l, collapse = "") ppi2_pos_lc table(str_df_plot_cols$colours)[["light_pink"]] #bright pink: DEstabilsing affinity ppi2_pos_b = str_df_plot_cols$pos_chain[str_df_plot_cols$colours=="bright_pink"] ppi2_pos_bc = paste(ppi2_pos_b, collapse = "") ppi2_pos_bc table(str_df_plot_cols$colours)[["bright_pink"]] c2 = length(ppi2_pos_l) + length(ppi2_pos_b) == table(str_df_plot_cols$colour_map)[["pink"]] if (c2){ cat("PASS: ppi2 colour numbers checked") }else{ stop("Abort: ppi2 colour numbers mismtatch") } #put in chimera cmd ppi2_pos_lc ppi2_pos_bc #========================================================= #------------------- # Stability #------------------- foo4 = str_df_plot_cols[str_df_plot_cols$colours=="light_dimgray",] all(foo2$effect_sign == 1) foo5 = str_df_plot_cols[str_df_plot_cols$colours=="bright_dimgray",] all(foo3$effect_sign == -1) #light_dimgray: stabilising Stability table(str_df_plot_cols$colours) stab_pos_l = str_df_plot_cols$pos_chain[str_df_plot_cols$colours=="light_dimgray"] stab_pos_lc = paste(stab_pos_l, collapse = "") stab_pos_lc table(str_df_plot_cols$colours)[["light_dimgray"]] #bright_dimgray pink: DEstabilsing Stability stab_pos_b = str_df_plot_cols$pos_chain[str_df_plot_cols$colours=="bright_dimgray"] stab_pos_bc = paste(stab_pos_b, collapse = "") stab_pos_bc table(str_df_plot_cols$colours)[["bright_dimgray"]] c3 = length(stab_pos_l) + length(stab_pos_b) == table(str_df_plot_cols$colour_map)[["dimgray"]] if (c3){ cat("PASS: stability colour numbers checked") }else{ stop("Abort: stability colour numbers mismtatch") } #put in chimera cmd stab_pos_lc stab_pos_bc # stab tool count table(str_df_plot_cols$effect_type) table(str_df_plot_cols$effect_type, str_df_plot_cols$effect_sign)