From cf732a3bcc5f90f83c1da621f9793fe76c2d10af Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Thu, 10 Sep 2020 16:03:04 +0100 Subject: [PATCH] saving work yet again to be extra sure --- scripts/plotting/combining_dfs_plotting.R | 23 ++++++----------------- scripts/plotting/other_plots_data.R | 7 ------- scripts/plotting/plotting_data.R | 2 +- 3 files changed, 7 insertions(+), 25 deletions(-) diff --git a/scripts/plotting/combining_dfs_plotting.R b/scripts/plotting/combining_dfs_plotting.R index 2b4750c..3bf6262 100644 --- a/scripts/plotting/combining_dfs_plotting.R +++ b/scripts/plotting/combining_dfs_plotting.R @@ -124,7 +124,6 @@ if (identical(sum(is.na(my_df_u$or_kin)) , "\nNA in AF:", sum(is.na(my_df_u$af_kin))) } -str(gene_metadata) ################################################################### # combining: PS @@ -146,7 +145,7 @@ merging_cols = intersect(colnames(my_df_u), colnames(gene_metadata)) cat(paste0("Merging dfs with NAs: big df (1-many relationship b/w id & mut)" , "\nNo. of merging cols:", length(merging_cols) - , "\nMerging columns identified:")) + , "\nMerging columns identified:\n")) print(merging_cols) # important checks! @@ -161,7 +160,7 @@ merged_df2 = merge(x = gene_metadata , by = merging_cols , all.y = T) -cat("Dim of merged_df2: ", dim(merged_df2)) +cat("Dim of merged_df2: ", dim(merged_df2), "\n") head(merged_df2$position) # sanity check @@ -171,10 +170,10 @@ if(nrow(gene_metadata) == nrow(merged_df2)){ ,"\nExpected no. of rows: ",nrow(gene_metadata) ,"\nGot no. of rows: ", nrow(merged_df2)) } else{ - cat("FAIL: nrow(merged_df2)!= nrow(gene associated gene_metadata)" + cat("\nFAIL: nrow(merged_df2)!= nrow(gene associated gene_metadata)" , "\nExpected no. of rows after merge: ", nrow(gene_metadata) , "\nGot no. of rows: ", nrow(merged_df2) - , "\nFinding discrepancy") + , "\nFinding discrepancy\n") merged_muts_u = unique(merged_df2$mutationinformation) meta_muts_u = unique(gene_metadata$mutationinformation) # find the index where it differs @@ -228,16 +227,6 @@ if (identical(sum(is.na(merged_df3$or_kin)) , "\nNA in AF:", sum(is.na(merged_df3$af_kin))) } -# check if the same or and afs are missing for -if ( identical( which(is.na(merged_df2$or_mychisq)), which(is.na(merged_df2$or_kin))) - && identical( which(is.na(merged_df2$af)), which(is.na(merged_df2$af_kin))) - && identical( which(is.na(merged_df2$pval_fisher)), which(is.na(merged_df2$pwald_kin))) ){ - cat("PASS: Indices match for mychisq and kin ors missing values") -} else{ - cat("Index mismatch: mychisq and kin ors missing indices match") - quit() -} - #========================= # Merge3: merged_df2_comp # same as merge 1 but excluding NAs from ORs, etc. @@ -270,7 +259,7 @@ if(nrow(merged_df2_comp) == (nrow(merged_df2) - na_count_df2)){ na_count_df3 = sum(is.na(merged_df3$af)) #merged_df3_comp = merged_df3_comp[!duplicated(merged_df3_comp$mutationinformation),] # a way merged_df3_comp = merged_df3[!is.na(merged_df3$af),] # another way -cat("Checking nrows in merged_df3_comp") +cat("\nChecking nrows in merged_df3_comp") if(nrow(merged_df3_comp) == (nrow(merged_df3) - na_count_df3)){ cat("\nPASS: No. of rows match" ,"\nDim of merged_df3_comp: " @@ -278,7 +267,7 @@ if(nrow(merged_df3_comp) == (nrow(merged_df3) - na_count_df3)){ , "\nNo. of rows: ", nrow(merged_df3_comp) , "\nNo. of cols: ", ncol(merged_df3_comp)) }else{ - cat("FAIL: No. of rows mismatch" + cat("\nFAIL: No. of rows mismatch" ,"\nExpected no. of rows: ", nrow(merged_df3) - na_count_df3 , "\nGot no. of rows: ", nrow(merged_df3_comp)) } diff --git a/scripts/plotting/other_plots_data.R b/scripts/plotting/other_plots_data.R index 8f6836a..9c54e9a 100644 --- a/scripts/plotting/other_plots_data.R +++ b/scripts/plotting/other_plots_data.R @@ -15,13 +15,6 @@ library(data.table) library(dplyr) source("combining_dfs_plotting.R") -#======= -# output -#======= -#lineage_dist_combined = "lineage_dist_combined_PS.svg" -#plot_lineage_dist_combined = paste0(plotdir,"/", lineage_dist_combined) - - rm(merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig , merged_df3_comp, merged_df3_comp_lig , my_df_u, my_df_u_lig) diff --git a/scripts/plotting/plotting_data.R b/scripts/plotting/plotting_data.R index 291579e..dd5760e 100755 --- a/scripts/plotting/plotting_data.R +++ b/scripts/plotting/plotting_data.R @@ -97,7 +97,7 @@ table(my_df_u$ligand_distance<10) my_df_u_lig = my_df_u[my_df_u$ligand_distance <10,] angstroms_symbol = "\u212b" -cat(paste0("There are ", nrow(my_df_u_lig), " sites lying within 10", angstroms_symbol, " of the ligand")) +cat(paste0("There are ", nrow(my_df_u_lig), " sites lying within 10", angstroms_symbol, " of the ligand\n")) ######################################################################## # end of data extraction and cleaning for plots #