#!/usr/bin/env Rscript ######################################################### # TASK: Script to format data for corr plots ######################################################### #================================================= # Data for Corrplots #================================================= cat("\n==========================================" , "\nCORR PLOTS data: ALL params" , "\n=========================================") # use data #merged_df2 #---------------------------- # columns for corr plots:PS #---------------------------- # NOTE: you can add mcsm_ppi column as well, and it will only select what it can find! big_df_colnames = data.frame(names(merged_df2)) corr_cols_select <- c("mutationinformation", drug, "mutation_info_labels" , "duet_stability_change", "ligand_affinity_change", "ddg_foldx", "asa", "rsa" , "rd_values", "kd_values", "log10_or_mychisq", "neglog_pval_fisher","af" , "deepddg", "ddg_dynamut", "ddg_dynamut2", "mcsm_na_affinity" , "ddg_encom", "dds_encom", "ddg_mcsm", "ddg_sdm", "ddg_duet", "ligand_distance") #=========================== # Corr data for plots: PS # big_df ps: ~ merged_df2 #=========================== corr_df_m2 = merged_df2[,colnames(merged_df2)%in%corr_cols_select] #----------------------- # formatting: some cols # Add pretty colnames #----------------------- corr_df_m2_f <- corr_df_m2 %>% rename( DUET = duet_stability_change , 'mCSM-lig' = ligand_affinity_change , FoldX = ddg_foldx , DeepDDG = deepddg , ASA = asa , RSA = rsa , KD = kd_values , RD = rd_values , MAF = af , 'Log (OR)' = log10_or_mychisq , '-Log (P)' = neglog_pval_fisher , Dynamut = ddg_dynamut , 'ENCoM-DDG'= ddg_encom , mCSM = ddg_mcsm , SDM = ddg_sdm , 'DUET-d' = ddg_duet , 'ENCoM-DDS'= dds_encom , Dynamut2 = ddg_dynamut2 , 'mCSM-NA' = mcsm_na_affinity ) #=========================== # Corr data for plots: PS # short_df ps: ~merged_df3 #=========================== corr_df_m3 = corr_df_m2[!duplicated(corr_df_m2$mutationinformation),] na_or = sum(is.na(corr_df_m3$log10_or_mychisq)) check1 = nrow(corr_df_m3) - na_or; check1 if (nrow(corr_df_m3) == nrow(merged_df3) && nrow(merged_df3_comp) == check1) { cat( "\nPASS: No. of rows for corr_df_m3 match" , "\nPASS: No. of OR values checked: " , check1) } else { cat("\nFAIL: Numbers mismatch:" , "\nExpected nrows: ", nrow(merged_df3) , "\nGot: ", nrow(corr_df_m3) , "\nExpected OR values: ", nrow(merged_df3_comp) , "\nGot: ", check1) } #----------------------- # formatting: some cols # Add pretty colnames #----------------------- corr_df_m3_f <- corr_df_m3 %>% rename( DUET = duet_stability_change , 'mCSM-lig' = ligand_affinity_change , FoldX = ddg_foldx , DeepDDG = deepddg , ASA = asa , RSA = rsa , KD = kd_values , RD = rd_values , MAF = af , 'Log (OR)' = log10_or_mychisq , '-Log (P)' = neglog_pval_fisher , Dynamut = ddg_dynamut , 'ENCoM-DDG'= ddg_encom , mCSM = ddg_mcsm , SDM = ddg_sdm , 'DUET-d' = ddg_duet , 'ENCoM-DDS'= dds_encom , Dynamut2 = ddg_dynamut2 , 'mCSM-NA' = mcsm_na_affinity ) ######################################################################## cat("\nCorr Data created:" , "\n===================================" , "\ncorr_df_m2: created from merged_df2" , "\n===================================" , "\nnrows:", nrow(corr_df_m2) , "\nncols:", ncol(corr_df_m2) , "\n===================================" , "\ncorr_df_m3: created from merged_df3" , "\n===================================" , "\nnrows:", nrow(corr_df_m3) , "\nncols:", ncol(corr_df_m3) )