changes made to combining_dfs_plotting.R

This commit is contained in:
Tanushree Tunstall 2021-06-23 16:15:15 +01:00
parent c6d1260f74
commit 2aec79af31
9 changed files with 258 additions and 126 deletions

View file

@ -2,11 +2,12 @@
###########################################################
# TASK: To combine mcsm combined file and meta data.
# This script is sourced from other .R scripts for plotting.
# This script is sourced by other .R scripts for plotting.
###########################################################
# load libraries and functions
#source("Header_TT.R")
#==========================================================
# combining_dfs_plotting():
@ -34,28 +35,7 @@ combining_dfs_plotting <- function( my_df_u
, gene_metadata
, lig_dist_colname = 'ligand_distance'
, lig_dist_cutoff = 10){
# #======================================
# # 1: Read file: <gene>_meta data.csv
# #======================================
# cat("\nReading meta data file:", df1_mcsm_comb)
#
# my_df_u <- read.csv(df1_mcsm_comb
# , stringsAsFactors = F
# , header = T)
# cat("\nDim:", dim(my_df_u))
#
# #======================================
# # 2: Read file: <gene>_meta data.csv
# #======================================
# cat("\nReading meta data file:", df2_gene_metadata)
#
# gene_metadata <- read.csv(df2_gene_metadata
# , stringsAsFactors = F
# , header = T)
# cat("\nDim:", dim(gene_metadata))
#
# table(gene_metadata$mutation_info)
# counting NAs in AF, OR cols
# or_mychisq
if (identical(sum(is.na(my_df_u$or_mychisq))
@ -219,7 +199,6 @@ combining_dfs_plotting <- function( my_df_u
, "\nNA in AF:", sum(is.na(merged_df3$af_kin)))
}
#===================================================
# Merge3: merged_df2_comp
# same as merge 1 but excluding NAs from ORs, etc.
@ -273,6 +252,13 @@ combining_dfs_plotting <- function( my_df_u
# compare dfs: foo and merged_df3_com
all.equal(foo, bar)
#summary(comparedf(foo, bar))
cat("\n------------------------"
, "\nSummary of created dfs:"
, "\n------------------------"
, "\n1) Dim of merged_df2: " , nrow(merged_df2), "," , ncol(merged_df2)
, "\n2) Dim of merged_df2_comp: " , nrow(merged_df2_comp), "," , ncol(merged_df2_comp)
, "\n3) Dim of merged_df3: " , nrow(merged_df3), "," , ncol(merged_df3)
, "\n4) Dim of merged_df3_comp: " , nrow(merged_df3_comp), "," , ncol(merged_df3_comp))
#####################################################################
# Combining: LIG
@ -281,13 +267,35 @@ combining_dfs_plotting <- function( my_df_u
#============
# Merges 5-8
#============
df_lig = my_df_u[my_df_u[[lig_dist_colname]]<lig_dist_cutoff,]
cat("\n=========================================="
, "\nStarting filtering for mcsm ligand df"
, "\n===========================================")
merged_df2_lig = merged_df2[merged_df2$ligand_distance<lig_dist_cutoff,]
merged_df2_comp_lig = merged_df2_comp[merged_df2_comp$ligand_distance<lig_dist_cutoff,]
if (lig_dist_colname%in%names(my_df_u)){
cat("\nFiltering column: ", lig_dist_colname
, "\nCut off criteria: ", lig_dist_cutoff, "Angstroms")
df_lig = my_df_u[my_df_u[[lig_dist_colname]] < lig_dist_cutoff,]
merged_df3_lig = merged_df3[merged_df3$ligand_distance<lig_dist_cutoff,]
merged_df3_comp_lig = merged_df3_comp[merged_df3_comp$ligand_distance<lig_dist_cutoff,]
#merged_df2_lig = merged_df2[merged_df2$ligand_distance<lig_dist_cutoff,]
merged_df2_lig = merged_df2[merged_df2[[lig_dist_colname]] < lig_dist_cutoff,]
dim(merged_df2_lig)
merged_df2_comp_lig = merged_df2_comp[merged_df2_comp[[lig_dist_colname]] < lig_dist_cutoff,]
merged_df3_lig = merged_df3[merged_df3[[lig_dist_colname]] < lig_dist_cutoff,]
merged_df3_comp_lig = merged_df3_comp[merged_df3_comp[[lig_dist_colname]] < lig_dist_cutoff,]
cat("\n------------------------"
, "\nSummary of created ligand dfs:"
, "\n------------------------"
, "\n1) Dim of merged_df2_lig: " , nrow(merged_df2_lig), "," , ncol(merged_df2_lig)
, "\n2) Dim of merged_df2_comp_lig: " , nrow(merged_df2_comp_lig), "," , ncol(merged_df2_comp_lig)
, "\n3) Dim of merged_df3_lig: " , nrow(merged_df3_lig), "," , ncol(merged_df3_lig)
, "\n4) Dim of merged_df3_comp_lig: " , nrow(merged_df3_comp_lig), "," , ncol(merged_df3_comp_lig))
} else {
cat("\nFiltering column: ", lig_dist_colname, " not found\n")
}
#quit()
# sanity check
if (nrow(merged_df3_lig) == nrow(df_lig)){
@ -297,7 +305,7 @@ combining_dfs_plotting <- function( my_df_u
, "\nExpected:", nrow(df_lig)
, "\nGot:", nrow(merged_df3_lig)))
}
#==============================================================
############################################
@ -327,8 +335,13 @@ combining_dfs_plotting <- function( my_df_u
# , "\nNo. of cols: ", ncol(get(i)), "\n")
#}
return(list(merged_df2, merged_df3
, merged_df2_comp, merged_df3_comp
, merged_df2_lig, merged_df3_lig))
return(list( merged_df2
, merged_df3
, merged_df2_comp
, merged_df3_comp
, merged_df2_lig
, merged_df3_lig
, merged_df2_comp_lig
, merged_df3_comp_lig))
}