From cf732a3bcc5f90f83c1da621f9793fe76c2d10af Mon Sep 17 00:00:00 2001
From: Tanushree Tunstall <tanu@tunstall.in>
Date: Thu, 10 Sep 2020 16:03:04 +0100
Subject: [PATCH] saving work yet again to be extra sure

---
 scripts/plotting/combining_dfs_plotting.R | 23 ++++++-----------------
 scripts/plotting/other_plots_data.R       |  7 -------
 scripts/plotting/plotting_data.R          |  2 +-
 3 files changed, 7 insertions(+), 25 deletions(-)

diff --git a/scripts/plotting/combining_dfs_plotting.R b/scripts/plotting/combining_dfs_plotting.R
index 2b4750c..3bf6262 100644
--- a/scripts/plotting/combining_dfs_plotting.R
+++ b/scripts/plotting/combining_dfs_plotting.R
@@ -124,7 +124,6 @@ if (identical(sum(is.na(my_df_u$or_kin))
       , "\nNA in AF:", sum(is.na(my_df_u$af_kin)))
 }
 
-str(gene_metadata)
 
 ###################################################################
 #                           combining: PS
@@ -146,7 +145,7 @@ merging_cols = intersect(colnames(my_df_u), colnames(gene_metadata))
 
 cat(paste0("Merging dfs with NAs: big df (1-many relationship b/w id & mut)"
            , "\nNo. of merging cols:", length(merging_cols)
-           , "\nMerging columns identified:"))
+           , "\nMerging columns identified:\n"))
 print(merging_cols)
 
 # important checks!
@@ -161,7 +160,7 @@ merged_df2 = merge(x = gene_metadata
                   , by = merging_cols
                   , all.y = T)
 
-cat("Dim of merged_df2: ", dim(merged_df2))
+cat("Dim of merged_df2: ", dim(merged_df2), "\n")
 head(merged_df2$position)
 
 # sanity check 
@@ -171,10 +170,10 @@ if(nrow(gene_metadata) == nrow(merged_df2)){
       ,"\nExpected no. of rows: ",nrow(gene_metadata) 
       ,"\nGot no. of rows: ", nrow(merged_df2))
 } else{
-  cat("FAIL: nrow(merged_df2)!= nrow(gene associated gene_metadata)"
+  cat("\nFAIL: nrow(merged_df2)!= nrow(gene associated gene_metadata)"
       , "\nExpected no. of rows after merge: ", nrow(gene_metadata)
       , "\nGot no. of rows: ", nrow(merged_df2)
-      , "\nFinding discrepancy")
+      , "\nFinding discrepancy\n")
   merged_muts_u = unique(merged_df2$mutationinformation)
   meta_muts_u = unique(gene_metadata$mutationinformation)
     # find the index where it differs
@@ -228,16 +227,6 @@ if (identical(sum(is.na(merged_df3$or_kin))
       , "\nNA in AF:", sum(is.na(merged_df3$af_kin)))
 }
 
-# check if the same or and afs are missing for 
-if ( identical( which(is.na(merged_df2$or_mychisq)), which(is.na(merged_df2$or_kin)))
-  && identical( which(is.na(merged_df2$af)), which(is.na(merged_df2$af_kin)))
-  && identical( which(is.na(merged_df2$pval_fisher)), which(is.na(merged_df2$pwald_kin))) ){
-  cat("PASS: Indices match for mychisq and kin ors missing values")
-} else{
-  cat("Index mismatch: mychisq and kin ors missing indices match")
-  quit()
-}
-
 #=========================
 # Merge3: merged_df2_comp
 # same as merge 1 but excluding NAs from ORs, etc.
@@ -270,7 +259,7 @@ if(nrow(merged_df2_comp) == (nrow(merged_df2) - na_count_df2)){
 na_count_df3 = sum(is.na(merged_df3$af))
 #merged_df3_comp = merged_df3_comp[!duplicated(merged_df3_comp$mutationinformation),] # a way
 merged_df3_comp = merged_df3[!is.na(merged_df3$af),] # another way
-cat("Checking nrows in merged_df3_comp")
+cat("\nChecking nrows in merged_df3_comp")
 if(nrow(merged_df3_comp) == (nrow(merged_df3) - na_count_df3)){
   cat("\nPASS: No. of rows match"
       ,"\nDim of merged_df3_comp: "
@@ -278,7 +267,7 @@ if(nrow(merged_df3_comp) == (nrow(merged_df3) - na_count_df3)){
       , "\nNo. of rows: ", nrow(merged_df3_comp)
       , "\nNo. of cols: ", ncol(merged_df3_comp))
 }else{
-  cat("FAIL: No. of rows mismatch"
+  cat("\nFAIL: No. of rows mismatch"
       ,"\nExpected no. of rows: ", nrow(merged_df3) - na_count_df3
       , "\nGot no. of rows: ", nrow(merged_df3_comp))
 }
diff --git a/scripts/plotting/other_plots_data.R b/scripts/plotting/other_plots_data.R
index 8f6836a..9c54e9a 100644
--- a/scripts/plotting/other_plots_data.R
+++ b/scripts/plotting/other_plots_data.R
@@ -15,13 +15,6 @@ library(data.table)
 library(dplyr)
 source("combining_dfs_plotting.R")
 
-#=======
-# output
-#=======
-#lineage_dist_combined = "lineage_dist_combined_PS.svg"
-#plot_lineage_dist_combined  =  paste0(plotdir,"/", lineage_dist_combined)
-
-
 rm(merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig
    , merged_df3_comp, merged_df3_comp_lig
    , my_df_u, my_df_u_lig)
diff --git a/scripts/plotting/plotting_data.R b/scripts/plotting/plotting_data.R
index 291579e..dd5760e 100755
--- a/scripts/plotting/plotting_data.R
+++ b/scripts/plotting/plotting_data.R
@@ -97,7 +97,7 @@ table(my_df_u$ligand_distance<10)
 
 my_df_u_lig = my_df_u[my_df_u$ligand_distance <10,]
 angstroms_symbol = "\u212b"
-cat(paste0("There are ", nrow(my_df_u_lig), " sites lying within 10", angstroms_symbol, " of the ligand"))
+cat(paste0("There are ", nrow(my_df_u_lig), " sites lying within 10", angstroms_symbol, " of the ligand\n"))
 
 ########################################################################
 #               end of data extraction and cleaning for plots          #