ml df stuff

This commit is contained in:
Tanushree Tunstall 2022-09-01 11:39:11 +01:00
parent c2b46286d8
commit 82e2da4f3b
4 changed files with 815 additions and 26 deletions

View file

@ -40,11 +40,12 @@ geneL_ppi2 = c("alr", "embb", "katg", "rpob")
combining_dfs_plotting <- function( my_df_u
, gene_metadata
#, gene # ADDED
, lig_dist_colname = ''
, lig_dist_cutoff = ''){
, gene_metadata
#, gene # ADDED
, lig_dist_colname = ''
, lig_dist_cutoff = ''
, plotting = TRUE){
# counting NAs in AF, OR cols
# or_mychisq
if (identical(sum(is.na(my_df_u$or_mychisq))
@ -117,7 +118,7 @@ combining_dfs_plotting <- function( my_df_u
, y = my_df_u
, by = merging_cols
, all.y = T)
#, all.x = T)
#, all.x = T)
cat("\nDim of merged_df2: ", dim(merged_df2))
@ -182,13 +183,13 @@ combining_dfs_plotting <- function( my_df_u
,"\nExpected no. of rows: ", expected_nrows_df2
,"\nGot no. of rows: ", nrow(merged_df2))
}else{ cat("\nFAIL: nrow(merged_df2) is NOT as expected even after accounting for discrepancy"
, "\nExpected no. of rows after merge: ", expected_nrows_df2
, "\nGot no. of rows: ", nrow(merged_df2)
, "\nQuitting!")
, "\nExpected no. of rows after merge: ", expected_nrows_df2
, "\nGot no. of rows: ", nrow(merged_df2)
, "\nQuitting!")
quit()
}
}
}
# Quick formatting: ordering df and pretty labels
@ -332,7 +333,7 @@ combining_dfs_plotting <- function( my_df_u
}else{
stop("Cannot generate merged_df3")
}
##################################################################
##################################################################
head(merged_df3$position); tail(merged_df3$position) # should be sorted
# sanity check
@ -357,7 +358,7 @@ combining_dfs_plotting <- function( my_df_u
merged_df3[[consurf_colNew]] = as.factor(merged_df3[[consurf_colNew]])
merged_df3[[consurf_colNew]]
#levels(merged_df3$consurf_outcome) = c("nsd", 1, 2, 3, 4, 5, 6, 7, 8, 9)
merged_df2[[consurf_colNew]] = merged_df2[[consurf_colOld]]
merged_df2[[consurf_colNew]] = as.factor(merged_df2[[consurf_colNew]])
merged_df2[[consurf_colNew]]
@ -378,7 +379,7 @@ combining_dfs_plotting <- function( my_df_u
#----------------------------------------------
merged_df3$sensitivity = ifelse(merged_df3$dst_mode == 1, "R", "S")
merged_df3$mutation_info_labels = ifelse(merged_df3$mutation_info_labels == "DM", "R", "S")
merged_df2$sensitivity = ifelse(merged_df2$dst_mode == 1, "R", "S")
merged_df2$mutation_info_labels = ifelse(merged_df2$mutation_info_labels == "DM", "R", "S")
@ -387,7 +388,7 @@ combining_dfs_plotting <- function( my_df_u
check1 = all(merged_df3$mutation_info_labels == merged_df3$sensitivity)
check2 = all(merged_df2$mutation_info_labels == merged_df2$sensitivity)
if(check1 && check2){
cat("PASS: merged_df3 and merged_df2 have mutation info labels as R and S"
, "\nIt also has sensitivity column"
@ -420,9 +421,9 @@ combining_dfs_plotting <- function( my_df_u
# find which stability cols to average: should contain revised foldx
scaled_cols_stab = c("duet_scaled"
, "deepddg_scaled"
, "ddg_dynamut2_scaled"
, "foldx_scaled_signC" # needed to get avg stability
, "deepddg_scaled"
, "ddg_dynamut2_scaled"
, "foldx_scaled_signC" # needed to get avg stability
)
#-----------------------------------------------
@ -701,7 +702,7 @@ combining_dfs_plotting <- function( my_df_u
# merged_df3$pos_count <-NULL
merged_df3 = merged_df3[, !colnames(merged_df3)%in%c("pos_count")]
head(merged_df3$pos_count)
merged_df3 = merged_df3 %>%
dplyr::add_count(position)
class(merged_df3)
@ -710,7 +711,7 @@ combining_dfs_plotting <- function( my_df_u
nc_change = which(colnames(merged_df3) == "n")
colnames(merged_df3)[nc_change] <- "pos_count"
class(merged_df3)
####################################################################
#-------------------------------------------------
# merged_df2: Rename existing pos_count
@ -726,14 +727,14 @@ combining_dfs_plotting <- function( my_df_u
# already done in plotting_data
####################################################################
# Choose few columns to return as plot_df
merged_df3 = merged_df3[, colnames(merged_df3)%in%c(plotting_cols, "pos_count", "df2_pos_count_all")]
merged_df2 = merged_df2[, colnames(merged_df2)%in%c(plotting_cols, "df2_pos_count_all")]
if (plotting){
merged_df3 = merged_df3[, colnames(merged_df3)%in%c(plotting_cols, "pos_count", "df2_pos_count_all")]
merged_df2 = merged_df2[, colnames(merged_df2)%in%c(plotting_cols, "df2_pos_count_all")]
}
####################################################################
return(list( merged_df2
, merged_df3
))
cat("\nEnd of combining_dfs_plotting.R script")
cat("\nEnd of combining_dfs_plotting.R script")
}