added shorter scripts for each different processing for plots to make it wasire to read code
This commit is contained in:
parent
96129ddc34
commit
dc5b5e2f11
6 changed files with 1292 additions and 0 deletions
67
scripts/plotting/corr_data.R
Normal file
67
scripts/plotting/corr_data.R
Normal file
|
@ -0,0 +1,67 @@
|
|||
#!/usr/bin/env Rscript
|
||||
#########################################################
|
||||
# TASK: Script to format data for corr plots
|
||||
#########################################################
|
||||
|
||||
#=================================================
|
||||
# Data for Corrplots
|
||||
#=================================================
|
||||
cat("\n=========================================="
|
||||
, "\nCORR PLOTS data: ALL params"
|
||||
, "\n=========================================")
|
||||
|
||||
# use data
|
||||
#merged_df2
|
||||
|
||||
#----------------------------
|
||||
# columns for corr plots:PS
|
||||
#----------------------------
|
||||
# NOTE: you can add mcsm_ppi column as well, and it will only select what it can find!
|
||||
big_df_colnames = data.frame(names(merged_df2))
|
||||
|
||||
corr_cols_select <- c("mutationinformation", drug, "mutation_info_labels"
|
||||
, "duet_stability_change", "ligand_affinity_change", "ddg_foldx", "asa", "rsa"
|
||||
, "rd_values", "kd_values", "log10_or_mychisq", "neglog_pval_fisher","af"
|
||||
, "deepddg", "ddg_dynamut", "ddg_dynamut2", "mcsm_na_affinity"
|
||||
, "ddg_encom", "dds_encom", "ddg_mcsm", "ddg_sdm", "ddg_duet", "ligand_distance")
|
||||
|
||||
#===========================
|
||||
# Corr data for plots: PS
|
||||
# big_df ps: ~ merged_df2
|
||||
#===========================
|
||||
|
||||
corr_df_m2 = merged_df2[,colnames(merged_df2)%in%corr_cols_select]
|
||||
|
||||
#===========================
|
||||
# Corr data for plots: PS
|
||||
# short_df ps: ~merged_df3
|
||||
#===========================
|
||||
|
||||
corr_df_m3 = corr_df_m2[!duplicated(corr_df_m2$mutationinformation),]
|
||||
|
||||
na_or = sum(is.na(corr_df_m3$log10_or_mychisq))
|
||||
check1 = nrow(corr_df_m3) - na_or; check1
|
||||
|
||||
if (nrow(corr_df_m3) == nrow(merged_df3) && nrow(merged_df3_comp) == check1) {
|
||||
cat( "\nPASS: No. of rows for corr_df_m3 match"
|
||||
, "\nPASS: No. of OR values checked: " , check1)
|
||||
} else {
|
||||
cat("\nFAIL: Numbers mismatch:"
|
||||
, "\nExpected nrows: ", nrow(merged_df3)
|
||||
, "\nGot: ", nrow(corr_df_m3)
|
||||
, "\nExpected OR values: ", nrow(merged_df3_comp)
|
||||
, "\nGot: ", check1)
|
||||
}
|
||||
|
||||
cat("\nCorr Data created:"
|
||||
, "\n==================================="
|
||||
, "\ncorr_df_m2: created from merged_df2"
|
||||
, "\n==================================="
|
||||
, "\nnrows:", nrow(corr_df_m2)
|
||||
, "\nncols:", ncol(corr_df_m2)
|
||||
, "\n==================================="
|
||||
, "\ncorr_df_m3: created from merged_df3"
|
||||
, "\n==================================="
|
||||
, "\nnrows:", nrow(corr_df_m3)
|
||||
, "\nncols:", ncol(corr_df_m3)
|
||||
)
|
Loading…
Add table
Add a link
Reference in a new issue