From b614962e4500f2eed6499ffadeacb903d88fb752 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Mon, 28 Jun 2021 17:25:45 +0100 Subject: [PATCH] added corr data to get_plotting_dfs.R and generate corr plots --- scripts/plotting/corr_PS_LIG.R | 4 +- scripts/plotting/corr_data.R | 22 +- scripts/plotting/get_plotting_dfs.R | 217 +++++++++++++++++- scripts/plotting/running_plotting_scripts.txt | 28 ++- 4 files changed, 252 insertions(+), 19 deletions(-) diff --git a/scripts/plotting/corr_PS_LIG.R b/scripts/plotting/corr_PS_LIG.R index 6b1a708..54db489 100644 --- a/scripts/plotting/corr_PS_LIG.R +++ b/scripts/plotting/corr_PS_LIG.R @@ -85,9 +85,9 @@ df_ps$neglog_pval_fisher = -log10(df_ps$pval_fisher) df_ps$log10_or_kin = log10(df_ps$or_kin) df_ps$neglog_pwald_kin = -log10(df_ps$pwald_kin) -#=========================== +#=============================== # Data for Correlation plots:PS -#=========================== +#=============================== # subset data to generate pairwise correlations cols_to_select = c("duet_scaled" diff --git a/scripts/plotting/corr_data.R b/scripts/plotting/corr_data.R index 4d6847c..aabc62e 100644 --- a/scripts/plotting/corr_data.R +++ b/scripts/plotting/corr_data.R @@ -61,12 +61,6 @@ all_plot_dfs = combining_dfs_plotting(my_df_u , lig_dist_colname = 'ligand_distance' , lig_dist_cutoff = 10) - - - - - - cat(paste0("Directories imported:" , "\ndatadir:", datadir , "\nindir:", indir @@ -110,7 +104,6 @@ df_lig = merged_df2_lig #====================== # adding log cols #====================== - df_ps$log10_or_mychisq = log10(df_ps$or_mychisq) df_ps$neglog_pval_fisher = -log10(df_ps$pval_fisher) @@ -119,9 +112,9 @@ df_ps$neglog_pwald_kin = -log10(df_ps$pwald_kin) #df_ps$mutation_info_labels = ifelse(df_ps$mutation_info == dr_muts_col, 1, 0) -#=========================== +#=============================== # Data for Correlation plots:PS -#=========================== +#=============================== # subset data to generate pairwise correlations cols_to_select = c("mutationinformation" , "duet_scaled" @@ -136,7 +129,7 @@ cols_to_select = c("mutationinformation" , "or_kin" , "neglog_pwald_kin" , "af" - , "af_kin" + #, "af_kin" , "duet_outcome" , drug) @@ -176,9 +169,9 @@ offset = 1 corr_ps_df2 = corr_data_ps[start:end] head(corr_ps_df2) -#----------------- +#-------------------------- # short_df ps: merged_df3 -#----------------- +#-------------------------- corr_ps_df3 = corr_ps_df2[!duplicated(corr_ps_df2$Mutation),] na_or = sum(is.na(corr_ps_df3$`Log (OR)`)) @@ -194,9 +187,9 @@ check2 = nrow(corr_ps_df3) - na_adj_or #} ################################################################################################ -#=========================== +#================================= # Data for Correlation plots: LIG -#=========================== +#================================= table(df_lig$ligand_outcome) df_lig$log10_or_mychisq = log10(df_lig$or_mychisq) @@ -258,7 +251,6 @@ offset = 1 corr_lig_df2 = corr_data_lig[start:end] head(corr_lig_df2) - #----------------- # short_df lig: merged_df3_lig #----------------- diff --git a/scripts/plotting/get_plotting_dfs.R b/scripts/plotting/get_plotting_dfs.R index 537b84c..7337c89 100644 --- a/scripts/plotting/get_plotting_dfs.R +++ b/scripts/plotting/get_plotting_dfs.R @@ -231,6 +231,221 @@ str(wide_df_or_mult) position_or_mult = as.numeric(colnames(wide_df_or_mult)) +#################################################################### +# Data for Corrplots +#################################################################### +cat("\n==========================================" + , "\nCORR PLOTS data: PS" + , "\n===========================================") + +df_ps = merged_df2 + +#-------------------- +# adding log cols +#-------------------- +df_ps$log10_or_mychisq = log10(df_ps$or_mychisq) +df_ps$neglog_pval_fisher = -log10(df_ps$pval_fisher) + +##df_ps$log10_or_kin = log10(df_ps$or_kin) +##df_ps$neglog_pwald_kin = -log10(df_ps$pwald_kin) + +#df_ps$mutation_info_labels = ifelse(df_ps$mutation_info == dr_muts_col, 1, 0) + +#---------------------------- +# columns for corr plots:PS +#---------------------------- +# subset data to generate pairwise correlations +cols_to_select = c("mutationinformation" + , "duet_scaled" + , "foldx_scaled" + #, "mutation_info_labels" + , "asa" + , "rsa" + , "rd_values" + , "kd_values" + , "log10_or_mychisq" + , "neglog_pval_fisher" + ##, "or_kin" + ##, "neglog_pwald_kin" + , "af" + ##, "af_kin" + , "duet_outcome" + , drug) + +corr_data_ps = df_ps[cols_to_select] + +dim(corr_data_ps) + +#-------------------------------------- +# assign nice colnames (for display) +#-------------------------------------- +my_corr_colnames = c("Mutation" + , "DUET" + , "Foldx" + #, "Mutation class" + , "ASA" + , "RSA" + , "RD" + , "KD" + , "Log (OR)" + , "-Log (P)" + ##, "Adjusted (OR)" + ##, "-Log (P wald)" + , "MAF" + ##, "AF_kin" + , "duet_outcome" + , drug) + +length(my_corr_colnames) + +colnames(corr_data_ps) +colnames(corr_data_ps) <- my_corr_colnames +colnames(corr_data_ps) + +start = 1 +end = which(colnames(corr_data_ps) == drug); end # should be the last column +offset = 1 + +#=========================== +# Corr data for plots: PS +# big_df ps: ~ merged_df2 +#=========================== + +#corr_ps_df2 = corr_data_ps[start:(end-offset)] # without drug +corr_ps_df2 = corr_data_ps[start:end] +head(corr_ps_df2) + +#=========================== +# Corr data for plots: PS +# short_df ps: ~merged_df3 +#=========================== +corr_ps_df3 = corr_ps_df2[!duplicated(corr_ps_df2$Mutation),] + +na_or = sum(is.na(corr_ps_df3$`Log (OR)`)) +check1 = nrow(corr_ps_df3) - na_or + +##na_adj_or = sum(is.na(corr_ps_df3$`adjusted (OR)`)) +##check2 = nrow(corr_ps_df3) - na_adj_or + +if (nrow(corr_ps_df3) == nrow(merged_df3) && nrow(merged_df3_comp) == check1) { + cat( "\nPASS: No. of rows for corr_ps_df3 match" + , "\nPASS: No. of OR values checked: " , check1) +} else { + cat("\nFAIL: Numbers mismatch:" + , "\nExpected nrows: ", nrow(merged_df3) + , "\nGot: ", nrow(corr_ps_df3) + , "\nExpected OR values: ", nrow(merged_df3_comp) + , "\nGot: ", check1) +} + +#================================= +# Data for Correlation plots: LIG +#================================= +cat("\n==========================================" + , "\nCORR PLOTS data: PS" + , "\n===========================================") + +df_lig = merged_df2_lig + +table(df_lig$ligand_outcome) + +#-------------------- +# adding log cols +#-------------------- +df_lig$log10_or_mychisq = log10(df_lig$or_mychisq) +df_lig$neglog_pval_fisher = -log10(df_lig$pval_fisher) + +##df_lig$log10_or_kin = log10(df_lig$or_kin) +##df_lig$neglog_pwald_kin = -log10(df_lig$pwald_kin) + +#---------------------------- +# columns for corr plots:PS +#---------------------------- +# subset data to generate pairwise correlations +cols_to_select = c("mutationinformation" + , "affinity_scaled" + #, "mutation_info_labels" + , "asa" + , "rsa" + , "rd_values" + , "kd_values" + , "log10_or_mychisq" + , "neglog_pval_fisher" + ##, "or_kin" + ##, "neglog_pwald_kin" + , "af" + ##, "af_kin" + , "ligand_outcome" + , drug) + +corr_data_lig = df_lig[, cols_to_select] + +dim(corr_data_lig) + +#-------------------------------------- +# assign nice colnames (for display) +#-------------------------------------- +my_corr_colnames = c("Mutation" + , "Ligand Affinity" + #, "Mutation class" + , "ASA" + , "RSA" + , "RD" + , "KD" + , "Log (OR)" + , "-Log (P)" + ##, "Adjusted (OR)" + ##, "-Log (P wald)" + , "MAF" + ##, "MAF_kin" + , "ligand_outcome" + , drug) + +length(my_corr_colnames) + +colnames(corr_data_lig) +colnames(corr_data_lig) <- my_corr_colnames +colnames(corr_data_lig) + +start = 1 +end = which(colnames(corr_data_lig) == drug); end # should be the last column +offset = 1 + +#============================= +# Corr data for plots: LIG +# big_df lig: ~ merged_df2_lig +#============================== +#corr_lig_df2 = corr_data_lig[start:(end-offset)] # without drug +corr_lig_df2 = corr_data_lig[start:end] +head(corr_lig_df2) + +#============================= +# Corr data for plots: LIG +# short_df lig: ~ merged_df3_lig +#============================== +corr_lig_df3 = corr_lig_df2[!duplicated(corr_lig_df2$Mutation),] + +na_or_lig = sum(is.na(corr_lig_df3$`Log (OR)`)) +check1_lig = nrow(corr_lig_df3) - na_or_lig + +if (nrow(corr_lig_df3) == nrow(merged_df3_lig) && nrow(merged_df3_comp_lig) == check1_lig) { + cat( "\nPASS: No. of rows for corr_lig_df3 match" + , "\nPASS: No. of OR values checked: " , check1_lig) +} else { + cat("\nFAIL: Numbers mismatch:" + , "\nExpected nrows: ", nrow(merged_df3_lig) + , "\nGot: ", nrow(corr_ps_df3_lig) + , "\nExpected OR values: ", nrow(merged_df3_comp_lig) + , "\nGot: ", check1_lig) +} + +# remove unnecessary columns +identical(corr_data_lig, corr_lig_df2) +identical(corr_data_ps, corr_ps_df2) + +rm(df_ps, df_lig, corr_data_ps, corr_data_lig) + ######################################################################## # End of script -######################################################################## \ No newline at end of file +######################################################################## +rm(foo) diff --git a/scripts/plotting/running_plotting_scripts.txt b/scripts/plotting/running_plotting_scripts.txt index 9995b9b..74c62ad 100644 --- a/scripts/plotting/running_plotting_scripts.txt +++ b/scripts/plotting/running_plotting_scripts.txt @@ -37,7 +37,7 @@ the df needed to plot graphs. This is run by these ind plottings scripts like below as cmd where the cmd args return valid dfs used for plots. #=================== -# log_plots.R +# logo_plots.R #=================== #----------------------------------------------------------------------- ./logo_plots.R -d streptomycin -g gid @@ -62,6 +62,32 @@ sources: - fa flag has default if not supplied - fb flag has default if not supplied - Error in grid.Call fixed by commenting out image rendering on console + +#=================== +# corr_plots.R +#=================== +#----------------------------------------------------------------------- +./corr_plots.R -d streptomycin -g gid +#----------------------------------------------------------------------- + +It replaces + ## corr_data.R + ## corr_PS_LIG.R +These have been moved to redundant/ + +sources: + ## get_plotting_dfs.R + + outputs: 4 svgs in the plotdir + ## corr_PS.svg + ## corr_PS_all.svg + ## corr_LIG.svg + ## corr_LIG_all.svg + +note: + - fa flag has default if not supplied + - fb flag has default if not supplied + - Short df being used in this i.e derivations from _df3 ######################################################################## # TODO Delete: dirs.R