ml df stuff
This commit is contained in:
parent
c2b46286d8
commit
82e2da4f3b
4 changed files with 815 additions and 26 deletions
|
@ -40,11 +40,12 @@ geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
|||
|
||||
|
||||
combining_dfs_plotting <- function( my_df_u
|
||||
, gene_metadata
|
||||
#, gene # ADDED
|
||||
, lig_dist_colname = ''
|
||||
, lig_dist_cutoff = ''){
|
||||
|
||||
, gene_metadata
|
||||
#, gene # ADDED
|
||||
, lig_dist_colname = ''
|
||||
, lig_dist_cutoff = ''
|
||||
, plotting = TRUE){
|
||||
|
||||
# counting NAs in AF, OR cols
|
||||
# or_mychisq
|
||||
if (identical(sum(is.na(my_df_u$or_mychisq))
|
||||
|
@ -117,7 +118,7 @@ combining_dfs_plotting <- function( my_df_u
|
|||
, y = my_df_u
|
||||
, by = merging_cols
|
||||
, all.y = T)
|
||||
#, all.x = T)
|
||||
#, all.x = T)
|
||||
|
||||
cat("\nDim of merged_df2: ", dim(merged_df2))
|
||||
|
||||
|
@ -182,13 +183,13 @@ combining_dfs_plotting <- function( my_df_u
|
|||
,"\nExpected no. of rows: ", expected_nrows_df2
|
||||
,"\nGot no. of rows: ", nrow(merged_df2))
|
||||
}else{ cat("\nFAIL: nrow(merged_df2) is NOT as expected even after accounting for discrepancy"
|
||||
, "\nExpected no. of rows after merge: ", expected_nrows_df2
|
||||
, "\nGot no. of rows: ", nrow(merged_df2)
|
||||
, "\nQuitting!")
|
||||
, "\nExpected no. of rows after merge: ", expected_nrows_df2
|
||||
, "\nGot no. of rows: ", nrow(merged_df2)
|
||||
, "\nQuitting!")
|
||||
quit()
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
# Quick formatting: ordering df and pretty labels
|
||||
|
@ -332,7 +333,7 @@ combining_dfs_plotting <- function( my_df_u
|
|||
}else{
|
||||
stop("Cannot generate merged_df3")
|
||||
}
|
||||
##################################################################
|
||||
##################################################################
|
||||
head(merged_df3$position); tail(merged_df3$position) # should be sorted
|
||||
|
||||
# sanity check
|
||||
|
@ -357,7 +358,7 @@ combining_dfs_plotting <- function( my_df_u
|
|||
merged_df3[[consurf_colNew]] = as.factor(merged_df3[[consurf_colNew]])
|
||||
merged_df3[[consurf_colNew]]
|
||||
#levels(merged_df3$consurf_outcome) = c("nsd", 1, 2, 3, 4, 5, 6, 7, 8, 9)
|
||||
|
||||
|
||||
merged_df2[[consurf_colNew]] = merged_df2[[consurf_colOld]]
|
||||
merged_df2[[consurf_colNew]] = as.factor(merged_df2[[consurf_colNew]])
|
||||
merged_df2[[consurf_colNew]]
|
||||
|
@ -378,7 +379,7 @@ combining_dfs_plotting <- function( my_df_u
|
|||
#----------------------------------------------
|
||||
merged_df3$sensitivity = ifelse(merged_df3$dst_mode == 1, "R", "S")
|
||||
merged_df3$mutation_info_labels = ifelse(merged_df3$mutation_info_labels == "DM", "R", "S")
|
||||
|
||||
|
||||
merged_df2$sensitivity = ifelse(merged_df2$dst_mode == 1, "R", "S")
|
||||
merged_df2$mutation_info_labels = ifelse(merged_df2$mutation_info_labels == "DM", "R", "S")
|
||||
|
||||
|
@ -387,7 +388,7 @@ combining_dfs_plotting <- function( my_df_u
|
|||
|
||||
check1 = all(merged_df3$mutation_info_labels == merged_df3$sensitivity)
|
||||
check2 = all(merged_df2$mutation_info_labels == merged_df2$sensitivity)
|
||||
|
||||
|
||||
if(check1 && check2){
|
||||
cat("PASS: merged_df3 and merged_df2 have mutation info labels as R and S"
|
||||
, "\nIt also has sensitivity column"
|
||||
|
@ -420,9 +421,9 @@ combining_dfs_plotting <- function( my_df_u
|
|||
|
||||
# find which stability cols to average: should contain revised foldx
|
||||
scaled_cols_stab = c("duet_scaled"
|
||||
, "deepddg_scaled"
|
||||
, "ddg_dynamut2_scaled"
|
||||
, "foldx_scaled_signC" # needed to get avg stability
|
||||
, "deepddg_scaled"
|
||||
, "ddg_dynamut2_scaled"
|
||||
, "foldx_scaled_signC" # needed to get avg stability
|
||||
)
|
||||
|
||||
#-----------------------------------------------
|
||||
|
@ -701,7 +702,7 @@ combining_dfs_plotting <- function( my_df_u
|
|||
# merged_df3$pos_count <-NULL
|
||||
merged_df3 = merged_df3[, !colnames(merged_df3)%in%c("pos_count")]
|
||||
head(merged_df3$pos_count)
|
||||
|
||||
|
||||
merged_df3 = merged_df3 %>%
|
||||
dplyr::add_count(position)
|
||||
class(merged_df3)
|
||||
|
@ -710,7 +711,7 @@ combining_dfs_plotting <- function( my_df_u
|
|||
nc_change = which(colnames(merged_df3) == "n")
|
||||
colnames(merged_df3)[nc_change] <- "pos_count"
|
||||
class(merged_df3)
|
||||
|
||||
|
||||
####################################################################
|
||||
#-------------------------------------------------
|
||||
# merged_df2: Rename existing pos_count
|
||||
|
@ -726,14 +727,14 @@ combining_dfs_plotting <- function( my_df_u
|
|||
# already done in plotting_data
|
||||
####################################################################
|
||||
# Choose few columns to return as plot_df
|
||||
|
||||
merged_df3 = merged_df3[, colnames(merged_df3)%in%c(plotting_cols, "pos_count", "df2_pos_count_all")]
|
||||
merged_df2 = merged_df2[, colnames(merged_df2)%in%c(plotting_cols, "df2_pos_count_all")]
|
||||
|
||||
if (plotting){
|
||||
merged_df3 = merged_df3[, colnames(merged_df3)%in%c(plotting_cols, "pos_count", "df2_pos_count_all")]
|
||||
merged_df2 = merged_df2[, colnames(merged_df2)%in%c(plotting_cols, "df2_pos_count_all")]
|
||||
}
|
||||
####################################################################
|
||||
return(list( merged_df2
|
||||
, merged_df3
|
||||
))
|
||||
|
||||
cat("\nEnd of combining_dfs_plotting.R script")
|
||||
cat("\nEnd of combining_dfs_plotting.R script")
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue