various changes

2020-09-08 17:13:02 +01:00 · 2020-09-08 17:13:02 +01:00 · fe49a45447
commit fe49a45447
parent 5d9561f88a
3 changed files with 199 additions and 95 deletions
--- a/scripts/plotting/combining_dfs_plotting.R
+++ b/scripts/plotting/combining_dfs_plotting.R
@ -58,6 +58,20 @@ rm(my_df, upos, dup_muts)
 #in_file1: output of plotting_data.R
 # my_df_u

+# quick checks
+head(my_df_u[, c("mutation", "mutation2")])
+
+cols_to_extract  = c("mutationinformation", "mutation", "or_mychisq", "or_kin", "af", "af_kin")
+foo = my_df_u[, colnames(my_df_u)%in% cols_to_extract]
+
+
+which(is.na(my_df_u$af_kin)) == which(is.na(my_df_u$af))
+
+
+baz = cbind(my_df_u$mutation, my_df_u$or_mychisq, bar$mutation, bar$or_mychisq)
+colnames(baz) = c("my_df_u_muts", "my_df_u_or", "real_muts", "real_or")
+
+
 # infile 2: gene associated meta data
 #in_filename_gene_metadata = paste0(tolower(gene),  "_meta_data_with_AFandOR.csv")
 in_filename_gene_metadata = paste0(tolower(gene),  "_metadata.csv")
@ -94,6 +108,7 @@ gene_metadata <- read.csv(infile_gene_metadata
                      , header = T)
 cat("Dim:", dim(gene_metadata))

+
 # counting NAs in AF, OR cols: 
 if (identical(sum(is.na(my_df_u$or_mychisq))
              , sum(is.na(my_df_u$pval_fisher))
@ -230,9 +245,9 @@ if (identical(sum(is.na(merged_df3$or_kin))
 if ( identical( which(is.na(merged_df2$or_mychisq)), which(is.na(merged_df2$or_kin)))
  && identical( which(is.na(merged_df2$af)), which(is.na(merged_df2$af_kin)))
  && identical( which(is.na(merged_df2$pval_fisher)), which(is.na(merged_df2$pwald_kin))) ){
-  cat('PASS: Indices match for mychisq and kin ors missing values')
+  cat("PASS: Indices match for mychisq and kin ors missing values")
 } else{
-  cat('Index mismatch: mychisq and kin ors missing indices match')
+  cat("Index mismatch: mychisq and kin ors missing indices match")
  quit()
 }

@ -245,7 +260,7 @@ cat("Merging dfs without any NAs: big df (1-many relationship b/w id & mut)"
    ,"\nfilename: merged_df2_comp")

 if ( identical( which(is.na(merged_df2$af)), which(is.na(merged_df2$af_kin))) ){
-  print('mychisq and kin ors missing indices match. Procedding with omitting NAs')
+  print("mychisq and kin ors missing indices match. Procedding with omitting NAs")
  na_count_df2 = sum(is.na(merged_df2$af))
  merged_df2_comp = merged_df2[!is.na(merged_df2$af),] 
  # sanity check: no +-1 gymnastics
@ -262,7 +277,7 @@ if ( identical( which(is.na(merged_df2$af)), which(is.na(merged_df2$af_kin))) ){
        ,"\nGot no. of rows: ", nrow(merged_df2_comp))
  }
 }else{
-  print('Index mismatch for mychisq and kin ors. Aborting NA ommission')
+  print("Index mismatch for mychisq and kin ors. Aborting NA ommission")
 }

 #=========================
@ -272,7 +287,7 @@ if ( identical( which(is.na(merged_df2$af)), which(is.na(merged_df2$af_kin))) ){
 #=========================

 if ( identical( which(is.na(merged_df3$af)), which(is.na(merged_df3$af_kin))) ){
-  print('mychisq and kin ors missing indices match. Procedding with omitting NAs')
+  print("mychisq and kin ors missing indices match. Procedding with omitting NAs")
  na_count_df3 = sum(is.na(merged_df3$af))
  #merged_df3_comp = merged_df3_comp[!duplicated(merged_df3_comp$mutationinformation),] # a way
  merged_df3_comp = merged_df3[!is.na(merged_df3$af),] # another way
@ -289,7 +304,7 @@ if ( identical( which(is.na(merged_df3$af)), which(is.na(merged_df3$af_kin))) ){
        ,"\nGot no. of rows: ", nrow(merged_df3_comp))
   }
 } else{
-  print('Index mismatch for mychisq and kin ors. Aborting NA ommission')
+  print("Index mismatch for mychisq and kin ors. Aborting NA ommission")
 } 
  
 # alternate way of deriving merged_df3_comp
@ -347,7 +362,7 @@ merged_df3_comp_lig = merged_df3_comp[merged_df3_comp$ligand_distance<10,]
 if (nrow(merged_df3_lig) == nrow(my_df_u_lig)){
  print("PASS: verified merged_df3_lig")
 }else{
-  cat(paste0('FAIL: nrow mismatch for merged_df3_lig'
+  cat(paste0("FAIL: nrow mismatch for merged_df3_lig"
             , "\nExpected:", nrow(my_df_u_lig)
             , "\nGot:", nrow(merged_df3_lig)))
 }