saving work yet again to be extra sure
This commit is contained in:
parent
01273a8184
commit
8874f9911f
3 changed files with 7 additions and 25 deletions
|
@ -124,7 +124,6 @@ if (identical(sum(is.na(my_df_u$or_kin))
|
|||
, "\nNA in AF:", sum(is.na(my_df_u$af_kin)))
|
||||
}
|
||||
|
||||
str(gene_metadata)
|
||||
|
||||
###################################################################
|
||||
# combining: PS
|
||||
|
@ -146,7 +145,7 @@ merging_cols = intersect(colnames(my_df_u), colnames(gene_metadata))
|
|||
|
||||
cat(paste0("Merging dfs with NAs: big df (1-many relationship b/w id & mut)"
|
||||
, "\nNo. of merging cols:", length(merging_cols)
|
||||
, "\nMerging columns identified:"))
|
||||
, "\nMerging columns identified:\n"))
|
||||
print(merging_cols)
|
||||
|
||||
# important checks!
|
||||
|
@ -161,7 +160,7 @@ merged_df2 = merge(x = gene_metadata
|
|||
, by = merging_cols
|
||||
, all.y = T)
|
||||
|
||||
cat("Dim of merged_df2: ", dim(merged_df2))
|
||||
cat("Dim of merged_df2: ", dim(merged_df2), "\n")
|
||||
head(merged_df2$position)
|
||||
|
||||
# sanity check
|
||||
|
@ -171,10 +170,10 @@ if(nrow(gene_metadata) == nrow(merged_df2)){
|
|||
,"\nExpected no. of rows: ",nrow(gene_metadata)
|
||||
,"\nGot no. of rows: ", nrow(merged_df2))
|
||||
} else{
|
||||
cat("FAIL: nrow(merged_df2)!= nrow(gene associated gene_metadata)"
|
||||
cat("\nFAIL: nrow(merged_df2)!= nrow(gene associated gene_metadata)"
|
||||
, "\nExpected no. of rows after merge: ", nrow(gene_metadata)
|
||||
, "\nGot no. of rows: ", nrow(merged_df2)
|
||||
, "\nFinding discrepancy")
|
||||
, "\nFinding discrepancy\n")
|
||||
merged_muts_u = unique(merged_df2$mutationinformation)
|
||||
meta_muts_u = unique(gene_metadata$mutationinformation)
|
||||
# find the index where it differs
|
||||
|
@ -228,16 +227,6 @@ if (identical(sum(is.na(merged_df3$or_kin))
|
|||
, "\nNA in AF:", sum(is.na(merged_df3$af_kin)))
|
||||
}
|
||||
|
||||
# check if the same or and afs are missing for
|
||||
if ( identical( which(is.na(merged_df2$or_mychisq)), which(is.na(merged_df2$or_kin)))
|
||||
&& identical( which(is.na(merged_df2$af)), which(is.na(merged_df2$af_kin)))
|
||||
&& identical( which(is.na(merged_df2$pval_fisher)), which(is.na(merged_df2$pwald_kin))) ){
|
||||
cat("PASS: Indices match for mychisq and kin ors missing values")
|
||||
} else{
|
||||
cat("Index mismatch: mychisq and kin ors missing indices match")
|
||||
quit()
|
||||
}
|
||||
|
||||
#=========================
|
||||
# Merge3: merged_df2_comp
|
||||
# same as merge 1 but excluding NAs from ORs, etc.
|
||||
|
@ -270,7 +259,7 @@ if(nrow(merged_df2_comp) == (nrow(merged_df2) - na_count_df2)){
|
|||
na_count_df3 = sum(is.na(merged_df3$af))
|
||||
#merged_df3_comp = merged_df3_comp[!duplicated(merged_df3_comp$mutationinformation),] # a way
|
||||
merged_df3_comp = merged_df3[!is.na(merged_df3$af),] # another way
|
||||
cat("Checking nrows in merged_df3_comp")
|
||||
cat("\nChecking nrows in merged_df3_comp")
|
||||
if(nrow(merged_df3_comp) == (nrow(merged_df3) - na_count_df3)){
|
||||
cat("\nPASS: No. of rows match"
|
||||
,"\nDim of merged_df3_comp: "
|
||||
|
@ -278,7 +267,7 @@ if(nrow(merged_df3_comp) == (nrow(merged_df3) - na_count_df3)){
|
|||
, "\nNo. of rows: ", nrow(merged_df3_comp)
|
||||
, "\nNo. of cols: ", ncol(merged_df3_comp))
|
||||
}else{
|
||||
cat("FAIL: No. of rows mismatch"
|
||||
cat("\nFAIL: No. of rows mismatch"
|
||||
,"\nExpected no. of rows: ", nrow(merged_df3) - na_count_df3
|
||||
, "\nGot no. of rows: ", nrow(merged_df3_comp))
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue