added additional check in combining_df_plotting.R to account for check when generating merged_df2 as muts NOT present in mcsm can create trouble, so fixed that and ran it successfully for alr and katg
This commit is contained in:
parent
8f8a9db92c
commit
e2cdee2d08
4 changed files with 62 additions and 30 deletions
|
@ -53,20 +53,20 @@ combining_dfs_plotting <- function( my_df_u
|
|||
, "\nNA in pvalue: ", sum(is.na(my_df_u$pval_fisher))
|
||||
, "\nNA in AF:", sum(is.na(my_df_u$af)))
|
||||
}
|
||||
|
||||
# or kin
|
||||
if (identical(sum(is.na(my_df_u$or_kin))
|
||||
, sum(is.na(my_df_u$pwald_kin))
|
||||
, sum(is.na(my_df_u$af_kin)))){
|
||||
cat("\nPASS: NA count match for OR, pvalue and AF\n from Kinship matrix calculations")
|
||||
na_count = sum(is.na(my_df_u$af_kin))
|
||||
cat("\nNo. of NAs: ", sum(is.na(my_df_u$or_kin)))
|
||||
} else{
|
||||
cat("\nFAIL: NA count mismatch"
|
||||
, "\nNA in OR: ", sum(is.na(my_df_u$or_kin))
|
||||
, "\nNA in pvalue: ", sum(is.na(my_df_u$pwald_kin))
|
||||
, "\nNA in AF:", sum(is.na(my_df_u$af_kin)))
|
||||
}
|
||||
#
|
||||
# # or kin
|
||||
# if (identical(sum(is.na(my_df_u$or_kin))
|
||||
# , sum(is.na(my_df_u$pwald_kin))
|
||||
# , sum(is.na(my_df_u$af_kin)))){
|
||||
# cat("\nPASS: NA count match for OR, pvalue and AF\n from Kinship matrix calculations")
|
||||
# na_count = sum(is.na(my_df_u$af_kin))
|
||||
# cat("\nNo. of NAs: ", sum(is.na(my_df_u$or_kin)))
|
||||
# } else{
|
||||
# cat("\nFAIL: NA count mismatch"
|
||||
# , "\nNA in OR: ", sum(is.na(my_df_u$or_kin))
|
||||
# , "\nNA in pvalue: ", sum(is.na(my_df_u$pwald_kin))
|
||||
# , "\nNA in AF:", sum(is.na(my_df_u$af_kin)))
|
||||
# }
|
||||
|
||||
str(gene_metadata)
|
||||
|
||||
|
@ -98,7 +98,7 @@ combining_dfs_plotting <- function( my_df_u
|
|||
# merging_cols = merging_cols[[1]]
|
||||
merging_cols = 'mutationinformation'
|
||||
|
||||
cat("\nLinking column being used: mutationinformation")
|
||||
cat("\nLinking column being used:", merging_cols)
|
||||
|
||||
# important checks!
|
||||
table(nchar(my_df_u$mutationinformation))
|
||||
|
@ -111,6 +111,7 @@ combining_dfs_plotting <- function( my_df_u
|
|||
, y = my_df_u
|
||||
, by = merging_cols
|
||||
, all.y = T)
|
||||
#, all.x = T)
|
||||
|
||||
cat("\nDim of merged_df2: ", dim(merged_df2))
|
||||
|
||||
|
@ -138,6 +139,17 @@ combining_dfs_plotting <- function( my_df_u
|
|||
|
||||
head(merged_df2$position)
|
||||
|
||||
merged_muts_u = unique(merged_df2$mutationinformation)
|
||||
meta_muts_u = unique(gene_metadata$mutationinformation)
|
||||
# find the index where it differs
|
||||
cat("\nLength of unique mcsm_muts:", length(merged_muts_u)
|
||||
, "\nLength of unique meta muts:",length(meta_muts_u) )
|
||||
|
||||
meta_muts_all = gene_metadata$mutationinformation
|
||||
merged_muts = merged_df2$mutationinformation
|
||||
discrepancy_uniq = unique(meta_muts_u[! meta_muts_u %in% merged_muts_u])
|
||||
discrepancy = meta_muts_all[! meta_muts_all %in% merged_muts]
|
||||
|
||||
# sanity check
|
||||
cat("\nChecking nrows in merged_df2")
|
||||
if(nrow(gene_metadata) == nrow(merged_df2)){
|
||||
|
@ -145,16 +157,36 @@ combining_dfs_plotting <- function( my_df_u
|
|||
,"\nExpected no. of rows: ",nrow(gene_metadata)
|
||||
,"\nGot no. of rows: ", nrow(merged_df2))
|
||||
} else{
|
||||
cat("\nFAIL: nrow(merged_df2)!= nrow(gene associated gene_metadata)"
|
||||
cat("\nWARNING: nrow(merged_df2)!= nrow(gene associated gene_metadata)"
|
||||
, "\nExpected no. of rows after merge: ", nrow(gene_metadata)
|
||||
, "\nGot no. of rows: ", nrow(merged_df2)
|
||||
, "\nFinding discrepancy")
|
||||
merged_muts_u = unique(merged_df2$mutationinformation)
|
||||
meta_muts_u = unique(gene_metadata$mutationinformation)
|
||||
# find the index where it differs
|
||||
unique(meta_muts_u[! meta_muts_u %in% merged_muts_u])
|
||||
quit()
|
||||
cat("\nLength of unique mcsm_muts:", length(merged_muts_u)
|
||||
, "\nLength of unique meta muts:",length(meta_muts_u)
|
||||
, "\nLength of unique muts in meta muts NOT in mcsm muts:", length(discrepancy_uniq)
|
||||
, "These correspond to:", discrepancy, "entries"
|
||||
, "\nThese problematic muts are:\n"
|
||||
, discrepancy_uniq)
|
||||
#quit()
|
||||
cat("\nChecking again...")
|
||||
expected_nrows_df2 = nrow(gene_metadata) - length(discrepancy)
|
||||
if (nrow(merged_df2) == expected_nrows_df2){
|
||||
cat("\nPASS: nrow(merged_df2) is as expected after accounting for discrepancy"
|
||||
,"\nExpected no. of rows: ", expected_nrows_df2
|
||||
,"\nGot no. of rows: ", nrow(merged_df2))
|
||||
}else{ cat("\nFAIL: nrow(merged_df2) is NOT as expected even after accounting for discrepancy"
|
||||
, "\nExpected no. of rows after merge: ", expected_nrows_df2
|
||||
, "\nGot no. of rows: ", nrow(merged_df2)
|
||||
, "\nQuitting!")
|
||||
quit()
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
# Quick formatting: ordering df and pretty labels
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue