saving work yet again to be extra sure

This commit is contained in:
Tanushree Tunstall 2020-09-10 16:03:04 +01:00
parent 65841e4f5b
commit cf732a3bcc
3 changed files with 7 additions and 25 deletions

View file

@ -124,7 +124,6 @@ if (identical(sum(is.na(my_df_u$or_kin))
, "\nNA in AF:", sum(is.na(my_df_u$af_kin)))
}
str(gene_metadata)
###################################################################
# combining: PS
@ -146,7 +145,7 @@ merging_cols = intersect(colnames(my_df_u), colnames(gene_metadata))
cat(paste0("Merging dfs with NAs: big df (1-many relationship b/w id & mut)"
, "\nNo. of merging cols:", length(merging_cols)
, "\nMerging columns identified:"))
, "\nMerging columns identified:\n"))
print(merging_cols)
# important checks!
@ -161,7 +160,7 @@ merged_df2 = merge(x = gene_metadata
, by = merging_cols
, all.y = T)
cat("Dim of merged_df2: ", dim(merged_df2))
cat("Dim of merged_df2: ", dim(merged_df2), "\n")
head(merged_df2$position)
# sanity check
@ -171,10 +170,10 @@ if(nrow(gene_metadata) == nrow(merged_df2)){
,"\nExpected no. of rows: ",nrow(gene_metadata)
,"\nGot no. of rows: ", nrow(merged_df2))
} else{
cat("FAIL: nrow(merged_df2)!= nrow(gene associated gene_metadata)"
cat("\nFAIL: nrow(merged_df2)!= nrow(gene associated gene_metadata)"
, "\nExpected no. of rows after merge: ", nrow(gene_metadata)
, "\nGot no. of rows: ", nrow(merged_df2)
, "\nFinding discrepancy")
, "\nFinding discrepancy\n")
merged_muts_u = unique(merged_df2$mutationinformation)
meta_muts_u = unique(gene_metadata$mutationinformation)
# find the index where it differs
@ -228,16 +227,6 @@ if (identical(sum(is.na(merged_df3$or_kin))
, "\nNA in AF:", sum(is.na(merged_df3$af_kin)))
}
# check if the same or and afs are missing for
if ( identical( which(is.na(merged_df2$or_mychisq)), which(is.na(merged_df2$or_kin)))
&& identical( which(is.na(merged_df2$af)), which(is.na(merged_df2$af_kin)))
&& identical( which(is.na(merged_df2$pval_fisher)), which(is.na(merged_df2$pwald_kin))) ){
cat("PASS: Indices match for mychisq and kin ors missing values")
} else{
cat("Index mismatch: mychisq and kin ors missing indices match")
quit()
}
#=========================
# Merge3: merged_df2_comp
# same as merge 1 but excluding NAs from ORs, etc.
@ -270,7 +259,7 @@ if(nrow(merged_df2_comp) == (nrow(merged_df2) - na_count_df2)){
na_count_df3 = sum(is.na(merged_df3$af))
#merged_df3_comp = merged_df3_comp[!duplicated(merged_df3_comp$mutationinformation),] # a way
merged_df3_comp = merged_df3[!is.na(merged_df3$af),] # another way
cat("Checking nrows in merged_df3_comp")
cat("\nChecking nrows in merged_df3_comp")
if(nrow(merged_df3_comp) == (nrow(merged_df3) - na_count_df3)){
cat("\nPASS: No. of rows match"
,"\nDim of merged_df3_comp: "
@ -278,7 +267,7 @@ if(nrow(merged_df3_comp) == (nrow(merged_df3) - na_count_df3)){
, "\nNo. of rows: ", nrow(merged_df3_comp)
, "\nNo. of cols: ", ncol(merged_df3_comp))
}else{
cat("FAIL: No. of rows mismatch"
cat("\nFAIL: No. of rows mismatch"
,"\nExpected no. of rows: ", nrow(merged_df3) - na_count_df3
, "\nGot no. of rows: ", nrow(merged_df3_comp))
}

View file

@ -15,13 +15,6 @@ library(data.table)
library(dplyr)
source("combining_dfs_plotting.R")
#=======
# output
#=======
#lineage_dist_combined = "lineage_dist_combined_PS.svg"
#plot_lineage_dist_combined = paste0(plotdir,"/", lineage_dist_combined)
rm(merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig
, merged_df3_comp, merged_df3_comp_lig
, my_df_u, my_df_u_lig)

View file

@ -97,7 +97,7 @@ table(my_df_u$ligand_distance<10)
my_df_u_lig = my_df_u[my_df_u$ligand_distance <10,]
angstroms_symbol = "\u212b"
cat(paste0("There are ", nrow(my_df_u_lig), " sites lying within 10", angstroms_symbol, " of the ligand"))
cat(paste0("There are ", nrow(my_df_u_lig), " sites lying within 10", angstroms_symbol, " of the ligand\n"))
########################################################################
# end of data extraction and cleaning for plots #