#!/usr/bin/env Rscript #source("~/git/LSHTM_analysis/config/alr.R") source("~/git/LSHTM_analysis/config/embb.R") #source("~/git/LSHTM_analysis/config/katg.R") #source("~/git/LSHTM_analysis/config/gid.R") #source("~/git/LSHTM_analysis/config/pnca.R") #source("~/git/LSHTM_analysis/config/rpob.R") # get plottting dfs source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") ################################################################### # FIXME: ADD distance to NA when SP replies # DONE: plotting_globals.R dist_columns = c("ligand_distance", "interface_dist") DistCutOff = 10 common_cols = c("mutationinformation" , "X5uhc_position" , "X5uhc_offset" , "position" , "dst_mode" , "mutation_info_labels" , "sensitivity", dist_columns ) #=================== # stability cols #=================== raw_cols_stability = c("duet_stability_change" , "deepddg" , "ddg_dynamut2" , "ddg_foldx") scaled_cols_stability = c("duet_scaled" , "deepddg_scaled" , "ddg_dynamut2_scaled" , "foldx_scaled") outcome_cols_stability = c("duet_outcome" , "deepddg_outcome" , "ddg_dynamut2_outcome" , "foldx_outcome") #=================== # affinity cols #=================== raw_cols_affinity = c("ligand_affinity_change" , "mmcsm_lig" , "mcsm_ppi2_affinity" , "mcsm_na_affinity") scaled_cols_affinity = c("affinity_scaled" , "mmcsm_lig_scaled" , "mcsm_ppi2_scaled" , "mcsm_na_scaled" ) outcome_cols_affinity = c( "ligand_outcome" , "mmcsm_lig_outcome" , "mcsm_ppi2_outcome" , "mcsm_na_outcome") #=================== # conservation cols #=================== raw_cols_conservation = c("consurf_score" , "snap2_score" , "provean_score") scaled_cols_conservation = c("consurf_scaled" , "snap2_scaled" , "provean_scaled") # CANNOT strictly be used, as categories are not identical with conssurf missing altogether outcome_cols_conservation = c("provean_outcome" , "snap2_outcome" , "consurf_colour_rev" , "consurf_colour"#doesn't exist,use this mapping ) all_cols = c(common_cols , raw_cols_stability , scaled_cols_stability , outcome_cols_stability , raw_cols_affinity , scaled_cols_affinity , outcome_cols_affinity , raw_cols_conservation , scaled_cols_conservation , outcome_cols_conservation) #======= # output #======= outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") #################################### # merged_df3: NECESSARY pre-processing ################################### df3 = merged_df3 #================= # PREFORMATTING: for consistency #================= # DONE: combining_dfs.R # df3$sensitivity = ifelse(df3$dst_mode == 1, "R", "S") # table(df3$sensitivity) # ConSurf labels #consurf_colOld = "consurf_colour_rev" #consurf_colNew = "consurf_outcome" #df3[[consurf_colNew]] = df3[[consurf_colOld]] #df3[[consurf_colNew]] = as.factor(df3[[consurf_colNew]]) #df3[[consurf_colNew]] # not this bit levels(df3$consurf_outcome) = c( "nsd", 1, 2, 3, 4, 5, 6, 7, 8, 9) #levels(df3$consurf_outcome) # SNAP2 labels #snap2_colname = "snap2_outcome" #df3[[snap2_colname]] <- str_replace(df3[[snap2_colname]], "effect", "Effect") #df3[[snap2_colname]] <- str_replace(df3[[snap2_colname]], "neutral", "Neutral") # for ref: not needed perse as function already does this and assigns labels for barplots # labels_duet = levels(as.factor(df3$duet_outcome)) # labels_foldx = levels(as.factor(df3$foldx_outcome)) # labels_deepddg = levels(as.factor(df3$deepddg_outcome)) # labels_ddg_dynamut2_outcome = levels(as.factor(df3$ddg_dynamut2_outcome)) # # labels_lig = levels(as.factor(df3_lig$ligand_outcome)) # labels_mmlig = levels(as.factor(df3_lig$mmcsm_lig_outcome)) # labels_ppi2 = levels(as.factor(df3_ppi2$mcsm_ppi2_outcome)) # # labels_provean = levels(as.factor(df3$provean_outcome)) # labels_snap2 = levels(as.factor(df3$snap2_outcome)) # labels_consurf = levels(as.factor(df3$consurf_colour_rev)) # df3$consurf_colour_rev = as.factor(df3$consurf_colour_rev ) ############################################################################## ####################################### # merged_df2: NECESSARY pre-processing ###################################### df2 = merged_df2 #================= # PREFORMATTING: for consistency #================= # DONE: combining_dfs.R # df2$sensitivity = ifelse(df2$dst_mode == 1, "R", "S") # table(df2$sensitivity) #---------------------------------------------------- # Create dst2: fill na in dst with value of dst_mode # for epistasis #---------------------------------------------------- # DONE: combining_dfs.R # df2$dst2 = ifelse(is.na(df2$dst), df2$dst_mode, df2f$dst) #---------------------------------------------------- # reverse signs for foldx scaled values for # to allow average with other tools #---------------------------------------------------- head(df2['ddg_foldx']) df2['ddg_foldxC'] = abs(df2$ddg_foldx) head(df2['ddg_foldxC']) head(df2['foldx_scaled']) df2['foldx_scaled_signC'] = abs(df2$foldx_scaled) head(df2['foldx_scaled_signC']) rm_foldx_cols = c("ddg_foldx","foldx_scaled") raw_cols_stab_revised = raw_cols_stability[!raw_cols_stability%in%rm_foldx_cols] raw_cols_stab_revised = c(raw_cols_stab_revised,"ddg_foldxC") scaled_cols_stab_revised = scaled_cols_stability[!scaled_cols_stability%in%rm_foldx_cols] scaled_cols_stab_revised = c(scaled_cols_stab_revised, "foldx_scaled_signC") ###################################################### # Affinity related variables # DONE:in plotting_globals.R # DistCutOff = 10 # LigDist_colname # = "ligand_distance" # from globals # ppi2Dist_colname = "interface_dist" # naDist_colname = "TBC" ###################################################### # corr colnames # drug # "dst_mode" # "ligand_distance" # "DUET" # "mCSM-lig" # "FoldX" # "DeepDDG" # "ASA" # "RSA" # "KD" # "RD" # "Consurf" # "SNAP2" # "MAF" # "Log (OR)" # "-Log (P)" # "Dynamut2" # "mCSM-PPI2" # "interface_dist" corr_ps_colnames = c("DUET" , "FoldX" , "DeepDDG" , "Dynamut2" , "MAF" , "Log (OR)" , "-Log (P)" # , "ASA" # , "RSA" # , "KD" # , "RD" # , "Consurf" # , "SNAP2" #, "mCSM-lig" #, "ligand_distance" #, "mCSM-PPI2" #, "interface_dist" , "dst_mode" , drug ) corr_lig_colnames = c("mCSM-lig" , "MAF" , "Log (OR)" , "-Log (P)" , "ligand_distance" , "dst_mode" , drug) corr_ppi2_colnames = c("mCSM-PPI2" , "SNAP2" , "Log (OR)" , "-Log (P)" , "interface_dist" , "dst_mode" , drug) #FIXME: Add provean corr_conservation_cols = c("Consurf" , "SNAP2" , "MAF" , "Log (OR)" , "-Log (P)" , "dst_mode" , drug)