various changes
This commit is contained in:
parent
5d9561f88a
commit
fe49a45447
3 changed files with 199 additions and 95 deletions
|
@ -58,6 +58,20 @@ rm(my_df, upos, dup_muts)
|
|||
#in_file1: output of plotting_data.R
|
||||
# my_df_u
|
||||
|
||||
# quick checks
|
||||
head(my_df_u[, c("mutation", "mutation2")])
|
||||
|
||||
cols_to_extract = c("mutationinformation", "mutation", "or_mychisq", "or_kin", "af", "af_kin")
|
||||
foo = my_df_u[, colnames(my_df_u)%in% cols_to_extract]
|
||||
|
||||
|
||||
which(is.na(my_df_u$af_kin)) == which(is.na(my_df_u$af))
|
||||
|
||||
|
||||
baz = cbind(my_df_u$mutation, my_df_u$or_mychisq, bar$mutation, bar$or_mychisq)
|
||||
colnames(baz) = c("my_df_u_muts", "my_df_u_or", "real_muts", "real_or")
|
||||
|
||||
|
||||
# infile 2: gene associated meta data
|
||||
#in_filename_gene_metadata = paste0(tolower(gene), "_meta_data_with_AFandOR.csv")
|
||||
in_filename_gene_metadata = paste0(tolower(gene), "_metadata.csv")
|
||||
|
@ -94,6 +108,7 @@ gene_metadata <- read.csv(infile_gene_metadata
|
|||
, header = T)
|
||||
cat("Dim:", dim(gene_metadata))
|
||||
|
||||
|
||||
# counting NAs in AF, OR cols:
|
||||
if (identical(sum(is.na(my_df_u$or_mychisq))
|
||||
, sum(is.na(my_df_u$pval_fisher))
|
||||
|
@ -230,9 +245,9 @@ if (identical(sum(is.na(merged_df3$or_kin))
|
|||
if ( identical( which(is.na(merged_df2$or_mychisq)), which(is.na(merged_df2$or_kin)))
|
||||
&& identical( which(is.na(merged_df2$af)), which(is.na(merged_df2$af_kin)))
|
||||
&& identical( which(is.na(merged_df2$pval_fisher)), which(is.na(merged_df2$pwald_kin))) ){
|
||||
cat('PASS: Indices match for mychisq and kin ors missing values')
|
||||
cat("PASS: Indices match for mychisq and kin ors missing values")
|
||||
} else{
|
||||
cat('Index mismatch: mychisq and kin ors missing indices match')
|
||||
cat("Index mismatch: mychisq and kin ors missing indices match")
|
||||
quit()
|
||||
}
|
||||
|
||||
|
@ -245,7 +260,7 @@ cat("Merging dfs without any NAs: big df (1-many relationship b/w id & mut)"
|
|||
,"\nfilename: merged_df2_comp")
|
||||
|
||||
if ( identical( which(is.na(merged_df2$af)), which(is.na(merged_df2$af_kin))) ){
|
||||
print('mychisq and kin ors missing indices match. Procedding with omitting NAs')
|
||||
print("mychisq and kin ors missing indices match. Procedding with omitting NAs")
|
||||
na_count_df2 = sum(is.na(merged_df2$af))
|
||||
merged_df2_comp = merged_df2[!is.na(merged_df2$af),]
|
||||
# sanity check: no +-1 gymnastics
|
||||
|
@ -262,7 +277,7 @@ if ( identical( which(is.na(merged_df2$af)), which(is.na(merged_df2$af_kin))) ){
|
|||
,"\nGot no. of rows: ", nrow(merged_df2_comp))
|
||||
}
|
||||
}else{
|
||||
print('Index mismatch for mychisq and kin ors. Aborting NA ommission')
|
||||
print("Index mismatch for mychisq and kin ors. Aborting NA ommission")
|
||||
}
|
||||
|
||||
#=========================
|
||||
|
@ -272,7 +287,7 @@ if ( identical( which(is.na(merged_df2$af)), which(is.na(merged_df2$af_kin))) ){
|
|||
#=========================
|
||||
|
||||
if ( identical( which(is.na(merged_df3$af)), which(is.na(merged_df3$af_kin))) ){
|
||||
print('mychisq and kin ors missing indices match. Procedding with omitting NAs')
|
||||
print("mychisq and kin ors missing indices match. Procedding with omitting NAs")
|
||||
na_count_df3 = sum(is.na(merged_df3$af))
|
||||
#merged_df3_comp = merged_df3_comp[!duplicated(merged_df3_comp$mutationinformation),] # a way
|
||||
merged_df3_comp = merged_df3[!is.na(merged_df3$af),] # another way
|
||||
|
@ -289,7 +304,7 @@ if ( identical( which(is.na(merged_df3$af)), which(is.na(merged_df3$af_kin))) ){
|
|||
,"\nGot no. of rows: ", nrow(merged_df3_comp))
|
||||
}
|
||||
} else{
|
||||
print('Index mismatch for mychisq and kin ors. Aborting NA ommission')
|
||||
print("Index mismatch for mychisq and kin ors. Aborting NA ommission")
|
||||
}
|
||||
|
||||
# alternate way of deriving merged_df3_comp
|
||||
|
@ -347,7 +362,7 @@ merged_df3_comp_lig = merged_df3_comp[merged_df3_comp$ligand_distance<10,]
|
|||
if (nrow(merged_df3_lig) == nrow(my_df_u_lig)){
|
||||
print("PASS: verified merged_df3_lig")
|
||||
}else{
|
||||
cat(paste0('FAIL: nrow mismatch for merged_df3_lig'
|
||||
cat(paste0("FAIL: nrow mismatch for merged_df3_lig"
|
||||
, "\nExpected:", nrow(my_df_u_lig)
|
||||
, "\nGot:", nrow(merged_df3_lig)))
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue