git trimmed downthe dm_om_data.R

This commit is contained in:
Tanushree Tunstall 2022-08-05 14:36:02 +01:00
parent fae846395d
commit 05ab89ec09
5 changed files with 168 additions and 351 deletions

View file

@ -343,20 +343,45 @@ combining_dfs_plotting <- function( my_df_u
, "\nNo. of rows merged_df3: ", nrow(merged_df3))
quit()
}
#---------------------------------------------
# add columns that are needed to generate plots with revised colnames and strings
#----------------------------------------------
merged_df3['sensitivity'] = ifelse(merged_df3['dst_mode'] == 1, "R", "S")
merged_df3['mutation_info_labels'] = ifelse(merged_df3['mutation_info_labels'] == "DM", "R", "S")
#=========================================
# NEW: add consurf outcome
#=========================================
consurf_colOld = "consurf_colour_rev"
consurf_colNew = "consurf_outcome"
merged_df3[[consurf_colNew]] = merged_df3[[consurf_colOld]]
merged_df3[[consurf_colNew]] = as.factor(merged_df3[[consurf_colNew]])
merged_df3[[consurf_colNew]]
#levels(merged_df3$consurf_outcome) = c("nsd", 1, 2, 3, 4, 5, 6, 7, 8, 9)
merged_df2['sensitivity'] = ifelse(merged_df2['dst_mode'] == 1, "R", "S")
merged_df2['mutation_info_labels'] = ifelse(merged_df2['mutation_info_labels'] == "DM", "R", "S")
merged_df2[[consurf_colNew]] = merged_df2[[consurf_colOld]]
merged_df2[[consurf_colNew]] = as.factor(merged_df2[[consurf_colNew]])
merged_df2[[consurf_colNew]]
#check1 = all(table(merged_df3["mutation_info_labels"]) == table(merged_df3['sensitivity']))
#check2 = all(table(merged_df2["mutation_info_labels"]) == table(merged_df2['sensitivity']))
#=========================================
# NEW: fixed case for SNAP2 labels
#=========================================
snap2_colname = "snap2_outcome"
merged_df3[[snap2_colname]] <- str_replace(merged_df3[[snap2_colname]], "effect", "Effect")
merged_df3[[snap2_colname]] <- str_replace(merged_df3[[snap2_colname]], "neutral", "Neutral")
merged_df2[[snap2_colname]] <- str_replace(merged_df2[[snap2_colname]], "effect", "Effect")
merged_df2[[snap2_colname]] <- str_replace(merged_df2[[snap2_colname]], "neutral", "Neutral")
#---------------------------------------------
# NEW: add columns that are needed to generate
# plots with revised colnames and strings
#----------------------------------------------
merged_df3$sensitivity = ifelse(merged_df3$dst_mode == 1, "R", "S")
merged_df3$mutation_info_labels = ifelse(merged_df3$mutation_info_labels == "DM", "R", "S")
check1 = all(merged_df3["mutation_info_labels"] == merged_df3['sensitivity'])
check2 = all(merged_df2["mutation_info_labels"] == merged_df2['sensitivity'])
merged_df2$sensitivity = ifelse(merged_df2$dst_mode == 1, "R", "S")
merged_df2$mutation_info_labels = ifelse(merged_df2$mutation_info_labels == "DM", "R", "S")
# for epistasis: fill na where dst: No equivalent in merged_df3
merged_df2$dst2 = ifelse(is.na(merged_df2$dst), merged_df2$dst_mode, merged_df2$dst)
check1 = all(merged_df3$mutation_info_labels == merged_df3$sensitivity)
check2 = all(merged_df2$mutation_info_labels == merged_df2$sensitivity)
if(check1 && check2){
cat("PASS: merged_df3 and merged_df2 have mutation info labels as R and S"