saving work yet again to be extra sure
This commit is contained in:
parent
65841e4f5b
commit
cf732a3bcc
3 changed files with 7 additions and 25 deletions
|
@ -124,7 +124,6 @@ if (identical(sum(is.na(my_df_u$or_kin))
|
||||||
, "\nNA in AF:", sum(is.na(my_df_u$af_kin)))
|
, "\nNA in AF:", sum(is.na(my_df_u$af_kin)))
|
||||||
}
|
}
|
||||||
|
|
||||||
str(gene_metadata)
|
|
||||||
|
|
||||||
###################################################################
|
###################################################################
|
||||||
# combining: PS
|
# combining: PS
|
||||||
|
@ -146,7 +145,7 @@ merging_cols = intersect(colnames(my_df_u), colnames(gene_metadata))
|
||||||
|
|
||||||
cat(paste0("Merging dfs with NAs: big df (1-many relationship b/w id & mut)"
|
cat(paste0("Merging dfs with NAs: big df (1-many relationship b/w id & mut)"
|
||||||
, "\nNo. of merging cols:", length(merging_cols)
|
, "\nNo. of merging cols:", length(merging_cols)
|
||||||
, "\nMerging columns identified:"))
|
, "\nMerging columns identified:\n"))
|
||||||
print(merging_cols)
|
print(merging_cols)
|
||||||
|
|
||||||
# important checks!
|
# important checks!
|
||||||
|
@ -161,7 +160,7 @@ merged_df2 = merge(x = gene_metadata
|
||||||
, by = merging_cols
|
, by = merging_cols
|
||||||
, all.y = T)
|
, all.y = T)
|
||||||
|
|
||||||
cat("Dim of merged_df2: ", dim(merged_df2))
|
cat("Dim of merged_df2: ", dim(merged_df2), "\n")
|
||||||
head(merged_df2$position)
|
head(merged_df2$position)
|
||||||
|
|
||||||
# sanity check
|
# sanity check
|
||||||
|
@ -171,10 +170,10 @@ if(nrow(gene_metadata) == nrow(merged_df2)){
|
||||||
,"\nExpected no. of rows: ",nrow(gene_metadata)
|
,"\nExpected no. of rows: ",nrow(gene_metadata)
|
||||||
,"\nGot no. of rows: ", nrow(merged_df2))
|
,"\nGot no. of rows: ", nrow(merged_df2))
|
||||||
} else{
|
} else{
|
||||||
cat("FAIL: nrow(merged_df2)!= nrow(gene associated gene_metadata)"
|
cat("\nFAIL: nrow(merged_df2)!= nrow(gene associated gene_metadata)"
|
||||||
, "\nExpected no. of rows after merge: ", nrow(gene_metadata)
|
, "\nExpected no. of rows after merge: ", nrow(gene_metadata)
|
||||||
, "\nGot no. of rows: ", nrow(merged_df2)
|
, "\nGot no. of rows: ", nrow(merged_df2)
|
||||||
, "\nFinding discrepancy")
|
, "\nFinding discrepancy\n")
|
||||||
merged_muts_u = unique(merged_df2$mutationinformation)
|
merged_muts_u = unique(merged_df2$mutationinformation)
|
||||||
meta_muts_u = unique(gene_metadata$mutationinformation)
|
meta_muts_u = unique(gene_metadata$mutationinformation)
|
||||||
# find the index where it differs
|
# find the index where it differs
|
||||||
|
@ -228,16 +227,6 @@ if (identical(sum(is.na(merged_df3$or_kin))
|
||||||
, "\nNA in AF:", sum(is.na(merged_df3$af_kin)))
|
, "\nNA in AF:", sum(is.na(merged_df3$af_kin)))
|
||||||
}
|
}
|
||||||
|
|
||||||
# check if the same or and afs are missing for
|
|
||||||
if ( identical( which(is.na(merged_df2$or_mychisq)), which(is.na(merged_df2$or_kin)))
|
|
||||||
&& identical( which(is.na(merged_df2$af)), which(is.na(merged_df2$af_kin)))
|
|
||||||
&& identical( which(is.na(merged_df2$pval_fisher)), which(is.na(merged_df2$pwald_kin))) ){
|
|
||||||
cat("PASS: Indices match for mychisq and kin ors missing values")
|
|
||||||
} else{
|
|
||||||
cat("Index mismatch: mychisq and kin ors missing indices match")
|
|
||||||
quit()
|
|
||||||
}
|
|
||||||
|
|
||||||
#=========================
|
#=========================
|
||||||
# Merge3: merged_df2_comp
|
# Merge3: merged_df2_comp
|
||||||
# same as merge 1 but excluding NAs from ORs, etc.
|
# same as merge 1 but excluding NAs from ORs, etc.
|
||||||
|
@ -270,7 +259,7 @@ if(nrow(merged_df2_comp) == (nrow(merged_df2) - na_count_df2)){
|
||||||
na_count_df3 = sum(is.na(merged_df3$af))
|
na_count_df3 = sum(is.na(merged_df3$af))
|
||||||
#merged_df3_comp = merged_df3_comp[!duplicated(merged_df3_comp$mutationinformation),] # a way
|
#merged_df3_comp = merged_df3_comp[!duplicated(merged_df3_comp$mutationinformation),] # a way
|
||||||
merged_df3_comp = merged_df3[!is.na(merged_df3$af),] # another way
|
merged_df3_comp = merged_df3[!is.na(merged_df3$af),] # another way
|
||||||
cat("Checking nrows in merged_df3_comp")
|
cat("\nChecking nrows in merged_df3_comp")
|
||||||
if(nrow(merged_df3_comp) == (nrow(merged_df3) - na_count_df3)){
|
if(nrow(merged_df3_comp) == (nrow(merged_df3) - na_count_df3)){
|
||||||
cat("\nPASS: No. of rows match"
|
cat("\nPASS: No. of rows match"
|
||||||
,"\nDim of merged_df3_comp: "
|
,"\nDim of merged_df3_comp: "
|
||||||
|
@ -278,7 +267,7 @@ if(nrow(merged_df3_comp) == (nrow(merged_df3) - na_count_df3)){
|
||||||
, "\nNo. of rows: ", nrow(merged_df3_comp)
|
, "\nNo. of rows: ", nrow(merged_df3_comp)
|
||||||
, "\nNo. of cols: ", ncol(merged_df3_comp))
|
, "\nNo. of cols: ", ncol(merged_df3_comp))
|
||||||
}else{
|
}else{
|
||||||
cat("FAIL: No. of rows mismatch"
|
cat("\nFAIL: No. of rows mismatch"
|
||||||
,"\nExpected no. of rows: ", nrow(merged_df3) - na_count_df3
|
,"\nExpected no. of rows: ", nrow(merged_df3) - na_count_df3
|
||||||
, "\nGot no. of rows: ", nrow(merged_df3_comp))
|
, "\nGot no. of rows: ", nrow(merged_df3_comp))
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,13 +15,6 @@ library(data.table)
|
||||||
library(dplyr)
|
library(dplyr)
|
||||||
source("combining_dfs_plotting.R")
|
source("combining_dfs_plotting.R")
|
||||||
|
|
||||||
#=======
|
|
||||||
# output
|
|
||||||
#=======
|
|
||||||
#lineage_dist_combined = "lineage_dist_combined_PS.svg"
|
|
||||||
#plot_lineage_dist_combined = paste0(plotdir,"/", lineage_dist_combined)
|
|
||||||
|
|
||||||
|
|
||||||
rm(merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig
|
rm(merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig
|
||||||
, merged_df3_comp, merged_df3_comp_lig
|
, merged_df3_comp, merged_df3_comp_lig
|
||||||
, my_df_u, my_df_u_lig)
|
, my_df_u, my_df_u_lig)
|
||||||
|
|
|
@ -97,7 +97,7 @@ table(my_df_u$ligand_distance<10)
|
||||||
|
|
||||||
my_df_u_lig = my_df_u[my_df_u$ligand_distance <10,]
|
my_df_u_lig = my_df_u[my_df_u$ligand_distance <10,]
|
||||||
angstroms_symbol = "\u212b"
|
angstroms_symbol = "\u212b"
|
||||||
cat(paste0("There are ", nrow(my_df_u_lig), " sites lying within 10", angstroms_symbol, " of the ligand"))
|
cat(paste0("There are ", nrow(my_df_u_lig), " sites lying within 10", angstroms_symbol, " of the ligand\n"))
|
||||||
|
|
||||||
########################################################################
|
########################################################################
|
||||||
# end of data extraction and cleaning for plots #
|
# end of data extraction and cleaning for plots #
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue