saving work yet again to be extra sure

2020-09-10 16:03:04 +01:00 · 2020-09-10 16:03:04 +01:00 · cf732a3bcc
commit cf732a3bcc
parent 65841e4f5b
3 changed files with 7 additions and 25 deletions
--- a/scripts/plotting/combining_dfs_plotting.R
+++ b/scripts/plotting/combining_dfs_plotting.R
@ -124,7 +124,6 @@ if (identical(sum(is.na(my_df_u$or_kin))
      , "\nNA in AF:", sum(is.na(my_df_u$af_kin)))
 }

-str(gene_metadata)

 ###################################################################
 #                           combining: PS
@ -146,7 +145,7 @@ merging_cols = intersect(colnames(my_df_u), colnames(gene_metadata))

 cat(paste0("Merging dfs with NAs: big df (1-many relationship b/w id & mut)"
           , "\nNo. of merging cols:", length(merging_cols)
-           , "\nMerging columns identified:"))
+           , "\nMerging columns identified:\n"))
 print(merging_cols)

 # important checks!
@ -161,7 +160,7 @@ merged_df2 = merge(x = gene_metadata
                  , by = merging_cols
                  , all.y = T)

-cat("Dim of merged_df2: ", dim(merged_df2))
+cat("Dim of merged_df2: ", dim(merged_df2), "\n")
 head(merged_df2$position)

 # sanity check 
@ -171,10 +170,10 @@ if(nrow(gene_metadata) == nrow(merged_df2)){
      ,"\nExpected no. of rows: ",nrow(gene_metadata) 
      ,"\nGot no. of rows: ", nrow(merged_df2))
 } else{
-  cat("FAIL: nrow(merged_df2)!= nrow(gene associated gene_metadata)"
+  cat("\nFAIL: nrow(merged_df2)!= nrow(gene associated gene_metadata)"
      , "\nExpected no. of rows after merge: ", nrow(gene_metadata)
      , "\nGot no. of rows: ", nrow(merged_df2)
-      , "\nFinding discrepancy")
+      , "\nFinding discrepancy\n")
  merged_muts_u = unique(merged_df2$mutationinformation)
  meta_muts_u = unique(gene_metadata$mutationinformation)
    # find the index where it differs
@ -228,16 +227,6 @@ if (identical(sum(is.na(merged_df3$or_kin))
      , "\nNA in AF:", sum(is.na(merged_df3$af_kin)))
 }

-# check if the same or and afs are missing for 
-if ( identical( which(is.na(merged_df2$or_mychisq)), which(is.na(merged_df2$or_kin)))
-  && identical( which(is.na(merged_df2$af)), which(is.na(merged_df2$af_kin)))
-  && identical( which(is.na(merged_df2$pval_fisher)), which(is.na(merged_df2$pwald_kin))) ){
-  cat("PASS: Indices match for mychisq and kin ors missing values")
-} else{
-  cat("Index mismatch: mychisq and kin ors missing indices match")
-  quit()
-}
-
 #=========================
 # Merge3: merged_df2_comp
 # same as merge 1 but excluding NAs from ORs, etc.
@ -270,7 +259,7 @@ if(nrow(merged_df2_comp) == (nrow(merged_df2) - na_count_df2)){
 na_count_df3 = sum(is.na(merged_df3$af))
 #merged_df3_comp = merged_df3_comp[!duplicated(merged_df3_comp$mutationinformation),] # a way
 merged_df3_comp = merged_df3[!is.na(merged_df3$af),] # another way
-cat("Checking nrows in merged_df3_comp")
+cat("\nChecking nrows in merged_df3_comp")
 if(nrow(merged_df3_comp) == (nrow(merged_df3) - na_count_df3)){
  cat("\nPASS: No. of rows match"
      ,"\nDim of merged_df3_comp: "
@ -278,7 +267,7 @@ if(nrow(merged_df3_comp) == (nrow(merged_df3) - na_count_df3)){
      , "\nNo. of rows: ", nrow(merged_df3_comp)
      , "\nNo. of cols: ", ncol(merged_df3_comp))
 }else{
-  cat("FAIL: No. of rows mismatch"
+  cat("\nFAIL: No. of rows mismatch"
      ,"\nExpected no. of rows: ", nrow(merged_df3) - na_count_df3
      , "\nGot no. of rows: ", nrow(merged_df3_comp))
 }
--- a/scripts/plotting/other_plots_data.R
+++ b/scripts/plotting/other_plots_data.R
@ -15,13 +15,6 @@ library(data.table)
 library(dplyr)
 source("combining_dfs_plotting.R")

-#=======
-# output
-#=======
-#lineage_dist_combined = "lineage_dist_combined_PS.svg"
-#plot_lineage_dist_combined  =  paste0(plotdir,"/", lineage_dist_combined)
-
-
 rm(merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig
   , merged_df3_comp, merged_df3_comp_lig
   , my_df_u, my_df_u_lig)
--- a/scripts/plotting/plotting_data.R
+++ b/scripts/plotting/plotting_data.R
@ -97,7 +97,7 @@ table(my_df_u$ligand_distance<10)

 my_df_u_lig = my_df_u[my_df_u$ligand_distance <10,]
 angstroms_symbol = "\u212b"
-cat(paste0("There are ", nrow(my_df_u_lig), " sites lying within 10", angstroms_symbol, " of the ligand"))
+cat(paste0("There are ", nrow(my_df_u_lig), " sites lying within 10", angstroms_symbol, " of the ligand\n"))

 ########################################################################
 #               end of data extraction and cleaning for plots          #