various changes

This commit is contained in:
Tanushree Tunstall 2020-09-08 17:13:02 +01:00
parent 5d9561f88a
commit fe49a45447
3 changed files with 199 additions and 95 deletions

View file

@ -58,6 +58,20 @@ rm(my_df, upos, dup_muts)
#in_file1: output of plotting_data.R
# my_df_u
# quick checks
head(my_df_u[, c("mutation", "mutation2")])
cols_to_extract = c("mutationinformation", "mutation", "or_mychisq", "or_kin", "af", "af_kin")
foo = my_df_u[, colnames(my_df_u)%in% cols_to_extract]
which(is.na(my_df_u$af_kin)) == which(is.na(my_df_u$af))
baz = cbind(my_df_u$mutation, my_df_u$or_mychisq, bar$mutation, bar$or_mychisq)
colnames(baz) = c("my_df_u_muts", "my_df_u_or", "real_muts", "real_or")
# infile 2: gene associated meta data
#in_filename_gene_metadata = paste0(tolower(gene), "_meta_data_with_AFandOR.csv")
in_filename_gene_metadata = paste0(tolower(gene), "_metadata.csv")
@ -94,6 +108,7 @@ gene_metadata <- read.csv(infile_gene_metadata
, header = T)
cat("Dim:", dim(gene_metadata))
# counting NAs in AF, OR cols:
if (identical(sum(is.na(my_df_u$or_mychisq))
, sum(is.na(my_df_u$pval_fisher))
@ -230,9 +245,9 @@ if (identical(sum(is.na(merged_df3$or_kin))
if ( identical( which(is.na(merged_df2$or_mychisq)), which(is.na(merged_df2$or_kin)))
&& identical( which(is.na(merged_df2$af)), which(is.na(merged_df2$af_kin)))
&& identical( which(is.na(merged_df2$pval_fisher)), which(is.na(merged_df2$pwald_kin))) ){
cat('PASS: Indices match for mychisq and kin ors missing values')
cat("PASS: Indices match for mychisq and kin ors missing values")
} else{
cat('Index mismatch: mychisq and kin ors missing indices match')
cat("Index mismatch: mychisq and kin ors missing indices match")
quit()
}
@ -245,7 +260,7 @@ cat("Merging dfs without any NAs: big df (1-many relationship b/w id & mut)"
,"\nfilename: merged_df2_comp")
if ( identical( which(is.na(merged_df2$af)), which(is.na(merged_df2$af_kin))) ){
print('mychisq and kin ors missing indices match. Procedding with omitting NAs')
print("mychisq and kin ors missing indices match. Procedding with omitting NAs")
na_count_df2 = sum(is.na(merged_df2$af))
merged_df2_comp = merged_df2[!is.na(merged_df2$af),]
# sanity check: no +-1 gymnastics
@ -262,7 +277,7 @@ if ( identical( which(is.na(merged_df2$af)), which(is.na(merged_df2$af_kin))) ){
,"\nGot no. of rows: ", nrow(merged_df2_comp))
}
}else{
print('Index mismatch for mychisq and kin ors. Aborting NA ommission')
print("Index mismatch for mychisq and kin ors. Aborting NA ommission")
}
#=========================
@ -272,7 +287,7 @@ if ( identical( which(is.na(merged_df2$af)), which(is.na(merged_df2$af_kin))) ){
#=========================
if ( identical( which(is.na(merged_df3$af)), which(is.na(merged_df3$af_kin))) ){
print('mychisq and kin ors missing indices match. Procedding with omitting NAs')
print("mychisq and kin ors missing indices match. Procedding with omitting NAs")
na_count_df3 = sum(is.na(merged_df3$af))
#merged_df3_comp = merged_df3_comp[!duplicated(merged_df3_comp$mutationinformation),] # a way
merged_df3_comp = merged_df3[!is.na(merged_df3$af),] # another way
@ -289,7 +304,7 @@ if ( identical( which(is.na(merged_df3$af)), which(is.na(merged_df3$af_kin))) ){
,"\nGot no. of rows: ", nrow(merged_df3_comp))
}
} else{
print('Index mismatch for mychisq and kin ors. Aborting NA ommission')
print("Index mismatch for mychisq and kin ors. Aborting NA ommission")
}
# alternate way of deriving merged_df3_comp
@ -347,7 +362,7 @@ merged_df3_comp_lig = merged_df3_comp[merged_df3_comp$ligand_distance<10,]
if (nrow(merged_df3_lig) == nrow(my_df_u_lig)){
print("PASS: verified merged_df3_lig")
}else{
cat(paste0('FAIL: nrow mismatch for merged_df3_lig'
cat(paste0("FAIL: nrow mismatch for merged_df3_lig"
, "\nExpected:", nrow(my_df_u_lig)
, "\nGot:", nrow(merged_df3_lig)))
}