changes made to combining_dfs_plotting.R
This commit is contained in:
parent
c6d1260f74
commit
2aec79af31
9 changed files with 258 additions and 126 deletions
|
@ -2,11 +2,12 @@
|
|||
|
||||
###########################################################
|
||||
# TASK: To combine mcsm combined file and meta data.
|
||||
# This script is sourced from other .R scripts for plotting.
|
||||
# This script is sourced by other .R scripts for plotting.
|
||||
###########################################################
|
||||
|
||||
# load libraries and functions
|
||||
|
||||
#source("Header_TT.R")
|
||||
|
||||
#==========================================================
|
||||
# combining_dfs_plotting():
|
||||
|
||||
|
@ -34,28 +35,7 @@ combining_dfs_plotting <- function( my_df_u
|
|||
, gene_metadata
|
||||
, lig_dist_colname = 'ligand_distance'
|
||||
, lig_dist_cutoff = 10){
|
||||
# #======================================
|
||||
# # 1: Read file: <gene>_meta data.csv
|
||||
# #======================================
|
||||
# cat("\nReading meta data file:", df1_mcsm_comb)
|
||||
#
|
||||
# my_df_u <- read.csv(df1_mcsm_comb
|
||||
# , stringsAsFactors = F
|
||||
# , header = T)
|
||||
# cat("\nDim:", dim(my_df_u))
|
||||
#
|
||||
# #======================================
|
||||
# # 2: Read file: <gene>_meta data.csv
|
||||
# #======================================
|
||||
# cat("\nReading meta data file:", df2_gene_metadata)
|
||||
#
|
||||
# gene_metadata <- read.csv(df2_gene_metadata
|
||||
# , stringsAsFactors = F
|
||||
# , header = T)
|
||||
# cat("\nDim:", dim(gene_metadata))
|
||||
#
|
||||
# table(gene_metadata$mutation_info)
|
||||
|
||||
|
||||
# counting NAs in AF, OR cols
|
||||
# or_mychisq
|
||||
if (identical(sum(is.na(my_df_u$or_mychisq))
|
||||
|
@ -219,7 +199,6 @@ combining_dfs_plotting <- function( my_df_u
|
|||
, "\nNA in AF:", sum(is.na(merged_df3$af_kin)))
|
||||
}
|
||||
|
||||
|
||||
#===================================================
|
||||
# Merge3: merged_df2_comp
|
||||
# same as merge 1 but excluding NAs from ORs, etc.
|
||||
|
@ -273,6 +252,13 @@ combining_dfs_plotting <- function( my_df_u
|
|||
# compare dfs: foo and merged_df3_com
|
||||
all.equal(foo, bar)
|
||||
#summary(comparedf(foo, bar))
|
||||
cat("\n------------------------"
|
||||
, "\nSummary of created dfs:"
|
||||
, "\n------------------------"
|
||||
, "\n1) Dim of merged_df2: " , nrow(merged_df2), "," , ncol(merged_df2)
|
||||
, "\n2) Dim of merged_df2_comp: " , nrow(merged_df2_comp), "," , ncol(merged_df2_comp)
|
||||
, "\n3) Dim of merged_df3: " , nrow(merged_df3), "," , ncol(merged_df3)
|
||||
, "\n4) Dim of merged_df3_comp: " , nrow(merged_df3_comp), "," , ncol(merged_df3_comp))
|
||||
|
||||
#####################################################################
|
||||
# Combining: LIG
|
||||
|
@ -281,13 +267,35 @@ combining_dfs_plotting <- function( my_df_u
|
|||
#============
|
||||
# Merges 5-8
|
||||
#============
|
||||
df_lig = my_df_u[my_df_u[[lig_dist_colname]]<lig_dist_cutoff,]
|
||||
cat("\n=========================================="
|
||||
, "\nStarting filtering for mcsm ligand df"
|
||||
, "\n===========================================")
|
||||
|
||||
merged_df2_lig = merged_df2[merged_df2$ligand_distance<lig_dist_cutoff,]
|
||||
merged_df2_comp_lig = merged_df2_comp[merged_df2_comp$ligand_distance<lig_dist_cutoff,]
|
||||
if (lig_dist_colname%in%names(my_df_u)){
|
||||
cat("\nFiltering column: ", lig_dist_colname
|
||||
, "\nCut off criteria: ", lig_dist_cutoff, "Angstroms")
|
||||
df_lig = my_df_u[my_df_u[[lig_dist_colname]] < lig_dist_cutoff,]
|
||||
|
||||
merged_df3_lig = merged_df3[merged_df3$ligand_distance<lig_dist_cutoff,]
|
||||
merged_df3_comp_lig = merged_df3_comp[merged_df3_comp$ligand_distance<lig_dist_cutoff,]
|
||||
#merged_df2_lig = merged_df2[merged_df2$ligand_distance<lig_dist_cutoff,]
|
||||
merged_df2_lig = merged_df2[merged_df2[[lig_dist_colname]] < lig_dist_cutoff,]
|
||||
dim(merged_df2_lig)
|
||||
|
||||
merged_df2_comp_lig = merged_df2_comp[merged_df2_comp[[lig_dist_colname]] < lig_dist_cutoff,]
|
||||
|
||||
merged_df3_lig = merged_df3[merged_df3[[lig_dist_colname]] < lig_dist_cutoff,]
|
||||
merged_df3_comp_lig = merged_df3_comp[merged_df3_comp[[lig_dist_colname]] < lig_dist_cutoff,]
|
||||
|
||||
cat("\n------------------------"
|
||||
, "\nSummary of created ligand dfs:"
|
||||
, "\n------------------------"
|
||||
, "\n1) Dim of merged_df2_lig: " , nrow(merged_df2_lig), "," , ncol(merged_df2_lig)
|
||||
, "\n2) Dim of merged_df2_comp_lig: " , nrow(merged_df2_comp_lig), "," , ncol(merged_df2_comp_lig)
|
||||
, "\n3) Dim of merged_df3_lig: " , nrow(merged_df3_lig), "," , ncol(merged_df3_lig)
|
||||
, "\n4) Dim of merged_df3_comp_lig: " , nrow(merged_df3_comp_lig), "," , ncol(merged_df3_comp_lig))
|
||||
} else {
|
||||
cat("\nFiltering column: ", lig_dist_colname, " not found\n")
|
||||
}
|
||||
#quit()
|
||||
|
||||
# sanity check
|
||||
if (nrow(merged_df3_lig) == nrow(df_lig)){
|
||||
|
@ -297,7 +305,7 @@ combining_dfs_plotting <- function( my_df_u
|
|||
, "\nExpected:", nrow(df_lig)
|
||||
, "\nGot:", nrow(merged_df3_lig)))
|
||||
}
|
||||
|
||||
|
||||
#==============================================================
|
||||
|
||||
############################################
|
||||
|
@ -327,8 +335,13 @@ combining_dfs_plotting <- function( my_df_u
|
|||
# , "\nNo. of cols: ", ncol(get(i)), "\n")
|
||||
#}
|
||||
|
||||
return(list(merged_df2, merged_df3
|
||||
, merged_df2_comp, merged_df3_comp
|
||||
, merged_df2_lig, merged_df3_lig))
|
||||
return(list( merged_df2
|
||||
, merged_df3
|
||||
, merged_df2_comp
|
||||
, merged_df3_comp
|
||||
, merged_df2_lig
|
||||
, merged_df3_lig
|
||||
, merged_df2_comp_lig
|
||||
, merged_df3_comp_lig))
|
||||
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue