added corr data to get_plotting_dfs.R and generate corr plots

This commit is contained in:
Tanushree Tunstall 2021-06-28 17:25:45 +01:00
parent 2993ab722a
commit a7d26412e5
4 changed files with 252 additions and 19 deletions

View file

@ -85,9 +85,9 @@ df_ps$neglog_pval_fisher = -log10(df_ps$pval_fisher)
df_ps$log10_or_kin = log10(df_ps$or_kin) df_ps$log10_or_kin = log10(df_ps$or_kin)
df_ps$neglog_pwald_kin = -log10(df_ps$pwald_kin) df_ps$neglog_pwald_kin = -log10(df_ps$pwald_kin)
#=========================== #===============================
# Data for Correlation plots:PS # Data for Correlation plots:PS
#=========================== #===============================
# subset data to generate pairwise correlations # subset data to generate pairwise correlations
cols_to_select = c("duet_scaled" cols_to_select = c("duet_scaled"

View file

@ -61,12 +61,6 @@ all_plot_dfs = combining_dfs_plotting(my_df_u
, lig_dist_colname = 'ligand_distance' , lig_dist_colname = 'ligand_distance'
, lig_dist_cutoff = 10) , lig_dist_cutoff = 10)
cat(paste0("Directories imported:" cat(paste0("Directories imported:"
, "\ndatadir:", datadir , "\ndatadir:", datadir
, "\nindir:", indir , "\nindir:", indir
@ -110,7 +104,6 @@ df_lig = merged_df2_lig
#====================== #======================
# adding log cols # adding log cols
#====================== #======================
df_ps$log10_or_mychisq = log10(df_ps$or_mychisq) df_ps$log10_or_mychisq = log10(df_ps$or_mychisq)
df_ps$neglog_pval_fisher = -log10(df_ps$pval_fisher) df_ps$neglog_pval_fisher = -log10(df_ps$pval_fisher)
@ -119,9 +112,9 @@ df_ps$neglog_pwald_kin = -log10(df_ps$pwald_kin)
#df_ps$mutation_info_labels = ifelse(df_ps$mutation_info == dr_muts_col, 1, 0) #df_ps$mutation_info_labels = ifelse(df_ps$mutation_info == dr_muts_col, 1, 0)
#=========================== #===============================
# Data for Correlation plots:PS # Data for Correlation plots:PS
#=========================== #===============================
# subset data to generate pairwise correlations # subset data to generate pairwise correlations
cols_to_select = c("mutationinformation" cols_to_select = c("mutationinformation"
, "duet_scaled" , "duet_scaled"
@ -136,7 +129,7 @@ cols_to_select = c("mutationinformation"
, "or_kin" , "or_kin"
, "neglog_pwald_kin" , "neglog_pwald_kin"
, "af" , "af"
, "af_kin" #, "af_kin"
, "duet_outcome" , "duet_outcome"
, drug) , drug)
@ -176,9 +169,9 @@ offset = 1
corr_ps_df2 = corr_data_ps[start:end] corr_ps_df2 = corr_data_ps[start:end]
head(corr_ps_df2) head(corr_ps_df2)
#----------------- #--------------------------
# short_df ps: merged_df3 # short_df ps: merged_df3
#----------------- #--------------------------
corr_ps_df3 = corr_ps_df2[!duplicated(corr_ps_df2$Mutation),] corr_ps_df3 = corr_ps_df2[!duplicated(corr_ps_df2$Mutation),]
na_or = sum(is.na(corr_ps_df3$`Log (OR)`)) na_or = sum(is.na(corr_ps_df3$`Log (OR)`))
@ -194,9 +187,9 @@ check2 = nrow(corr_ps_df3) - na_adj_or
#} #}
################################################################################################ ################################################################################################
#=========================== #=================================
# Data for Correlation plots: LIG # Data for Correlation plots: LIG
#=========================== #=================================
table(df_lig$ligand_outcome) table(df_lig$ligand_outcome)
df_lig$log10_or_mychisq = log10(df_lig$or_mychisq) df_lig$log10_or_mychisq = log10(df_lig$or_mychisq)
@ -258,7 +251,6 @@ offset = 1
corr_lig_df2 = corr_data_lig[start:end] corr_lig_df2 = corr_data_lig[start:end]
head(corr_lig_df2) head(corr_lig_df2)
#----------------- #-----------------
# short_df lig: merged_df3_lig # short_df lig: merged_df3_lig
#----------------- #-----------------

View file

@ -231,6 +231,221 @@ str(wide_df_or_mult)
position_or_mult = as.numeric(colnames(wide_df_or_mult)) position_or_mult = as.numeric(colnames(wide_df_or_mult))
####################################################################
# Data for Corrplots
####################################################################
cat("\n=========================================="
, "\nCORR PLOTS data: PS"
, "\n===========================================")
df_ps = merged_df2
#--------------------
# adding log cols
#--------------------
df_ps$log10_or_mychisq = log10(df_ps$or_mychisq)
df_ps$neglog_pval_fisher = -log10(df_ps$pval_fisher)
##df_ps$log10_or_kin = log10(df_ps$or_kin)
##df_ps$neglog_pwald_kin = -log10(df_ps$pwald_kin)
#df_ps$mutation_info_labels = ifelse(df_ps$mutation_info == dr_muts_col, 1, 0)
#----------------------------
# columns for corr plots:PS
#----------------------------
# subset data to generate pairwise correlations
cols_to_select = c("mutationinformation"
, "duet_scaled"
, "foldx_scaled"
#, "mutation_info_labels"
, "asa"
, "rsa"
, "rd_values"
, "kd_values"
, "log10_or_mychisq"
, "neglog_pval_fisher"
##, "or_kin"
##, "neglog_pwald_kin"
, "af"
##, "af_kin"
, "duet_outcome"
, drug)
corr_data_ps = df_ps[cols_to_select]
dim(corr_data_ps)
#--------------------------------------
# assign nice colnames (for display)
#--------------------------------------
my_corr_colnames = c("Mutation"
, "DUET"
, "Foldx"
#, "Mutation class"
, "ASA"
, "RSA"
, "RD"
, "KD"
, "Log (OR)"
, "-Log (P)"
##, "Adjusted (OR)"
##, "-Log (P wald)"
, "MAF"
##, "AF_kin"
, "duet_outcome"
, drug)
length(my_corr_colnames)
colnames(corr_data_ps)
colnames(corr_data_ps) <- my_corr_colnames
colnames(corr_data_ps)
start = 1
end = which(colnames(corr_data_ps) == drug); end # should be the last column
offset = 1
#===========================
# Corr data for plots: PS
# big_df ps: ~ merged_df2
#===========================
#corr_ps_df2 = corr_data_ps[start:(end-offset)] # without drug
corr_ps_df2 = corr_data_ps[start:end]
head(corr_ps_df2)
#===========================
# Corr data for plots: PS
# short_df ps: ~merged_df3
#===========================
corr_ps_df3 = corr_ps_df2[!duplicated(corr_ps_df2$Mutation),]
na_or = sum(is.na(corr_ps_df3$`Log (OR)`))
check1 = nrow(corr_ps_df3) - na_or
##na_adj_or = sum(is.na(corr_ps_df3$`adjusted (OR)`))
##check2 = nrow(corr_ps_df3) - na_adj_or
if (nrow(corr_ps_df3) == nrow(merged_df3) && nrow(merged_df3_comp) == check1) {
cat( "\nPASS: No. of rows for corr_ps_df3 match"
, "\nPASS: No. of OR values checked: " , check1)
} else {
cat("\nFAIL: Numbers mismatch:"
, "\nExpected nrows: ", nrow(merged_df3)
, "\nGot: ", nrow(corr_ps_df3)
, "\nExpected OR values: ", nrow(merged_df3_comp)
, "\nGot: ", check1)
}
#=================================
# Data for Correlation plots: LIG
#=================================
cat("\n=========================================="
, "\nCORR PLOTS data: PS"
, "\n===========================================")
df_lig = merged_df2_lig
table(df_lig$ligand_outcome)
#--------------------
# adding log cols
#--------------------
df_lig$log10_or_mychisq = log10(df_lig$or_mychisq)
df_lig$neglog_pval_fisher = -log10(df_lig$pval_fisher)
##df_lig$log10_or_kin = log10(df_lig$or_kin)
##df_lig$neglog_pwald_kin = -log10(df_lig$pwald_kin)
#----------------------------
# columns for corr plots:PS
#----------------------------
# subset data to generate pairwise correlations
cols_to_select = c("mutationinformation"
, "affinity_scaled"
#, "mutation_info_labels"
, "asa"
, "rsa"
, "rd_values"
, "kd_values"
, "log10_or_mychisq"
, "neglog_pval_fisher"
##, "or_kin"
##, "neglog_pwald_kin"
, "af"
##, "af_kin"
, "ligand_outcome"
, drug)
corr_data_lig = df_lig[, cols_to_select]
dim(corr_data_lig)
#--------------------------------------
# assign nice colnames (for display)
#--------------------------------------
my_corr_colnames = c("Mutation"
, "Ligand Affinity"
#, "Mutation class"
, "ASA"
, "RSA"
, "RD"
, "KD"
, "Log (OR)"
, "-Log (P)"
##, "Adjusted (OR)"
##, "-Log (P wald)"
, "MAF"
##, "MAF_kin"
, "ligand_outcome"
, drug)
length(my_corr_colnames)
colnames(corr_data_lig)
colnames(corr_data_lig) <- my_corr_colnames
colnames(corr_data_lig)
start = 1
end = which(colnames(corr_data_lig) == drug); end # should be the last column
offset = 1
#=============================
# Corr data for plots: LIG
# big_df lig: ~ merged_df2_lig
#==============================
#corr_lig_df2 = corr_data_lig[start:(end-offset)] # without drug
corr_lig_df2 = corr_data_lig[start:end]
head(corr_lig_df2)
#=============================
# Corr data for plots: LIG
# short_df lig: ~ merged_df3_lig
#==============================
corr_lig_df3 = corr_lig_df2[!duplicated(corr_lig_df2$Mutation),]
na_or_lig = sum(is.na(corr_lig_df3$`Log (OR)`))
check1_lig = nrow(corr_lig_df3) - na_or_lig
if (nrow(corr_lig_df3) == nrow(merged_df3_lig) && nrow(merged_df3_comp_lig) == check1_lig) {
cat( "\nPASS: No. of rows for corr_lig_df3 match"
, "\nPASS: No. of OR values checked: " , check1_lig)
} else {
cat("\nFAIL: Numbers mismatch:"
, "\nExpected nrows: ", nrow(merged_df3_lig)
, "\nGot: ", nrow(corr_ps_df3_lig)
, "\nExpected OR values: ", nrow(merged_df3_comp_lig)
, "\nGot: ", check1_lig)
}
# remove unnecessary columns
identical(corr_data_lig, corr_lig_df2)
identical(corr_data_ps, corr_ps_df2)
rm(df_ps, df_lig, corr_data_ps, corr_data_lig)
######################################################################## ########################################################################
# End of script # End of script
######################################################################## ########################################################################
rm(foo)

View file

@ -37,7 +37,7 @@ the df needed to plot graphs. This is run by these ind plottings scripts
like below as cmd where the cmd args return valid dfs used for plots. like below as cmd where the cmd args return valid dfs used for plots.
#=================== #===================
# log_plots.R # logo_plots.R
#=================== #===================
#----------------------------------------------------------------------- #-----------------------------------------------------------------------
./logo_plots.R -d streptomycin -g gid ./logo_plots.R -d streptomycin -g gid
@ -62,6 +62,32 @@ sources:
- fa flag has default if not supplied - fa flag has default if not supplied
- fb flag has default if not supplied - fb flag has default if not supplied
- Error in grid.Call fixed by commenting out image rendering on console - Error in grid.Call fixed by commenting out image rendering on console
#===================
# corr_plots.R
#===================
#-----------------------------------------------------------------------
./corr_plots.R -d streptomycin -g gid
#-----------------------------------------------------------------------
It replaces
## corr_data.R
## corr_PS_LIG.R
These have been moved to redundant/
sources:
## get_plotting_dfs.R
outputs: 4 svgs in the plotdir
## corr_PS.svg
## corr_PS_all.svg
## corr_LIG.svg
## corr_LIG_all.svg
note:
- fa flag has default if not supplied
- fb flag has default if not supplied
- Short df being used in this i.e derivations from _df3
######################################################################## ########################################################################
# TODO # TODO
Delete: dirs.R Delete: dirs.R