`resolved ambiguous muts and generated clean output. Also seaprated dir.R

This commit is contained in:
Tanushree Tunstall 2020-09-09 11:26:13 +01:00
parent 46b43cf261
commit b7c7ffc018
5 changed files with 49 additions and 23 deletions

View file

@ -156,6 +156,7 @@ other_muts = foo2[foo2$mutation_info == other_muts_col,]
common_muts = dr_muts[dr_muts$mutation%in%other_muts$mutation,]
#write.csv(common_muts, 'common_muts.csv')
rm(common_muts)
# FIXME read properly
# "ambiguous_mut_names.csv"
@ -165,11 +166,45 @@ ambiguous_muts_names = ambiguous_muts$mutation
common_muts_all = gene_metadata[gene_metadata$mutation%in%ambiguous_muts_names,]
gene_metadata2 = gene_metadata
if (gene_metadata$mutation_info[gene_metadata$mutation%in%ambiguous_muts_names] == other_muts_col){
print('change me')
}
# make a copy
gene_metadata2 = gene_metadata
table(gene_metadata$mutation_info)
count_check = as.data.frame(cbind(table(gene_metadata$mutationinformation, gene_metadata$mutation_info)))
#count_check$checks = ifelse(count_check$dr_mutations_pyrazinamide&&count_check$other_mutations_pyrazinamide>0, "ambi", "pass")
table(count_check$checks)
poo = c("V180F", "G132A", "D49G")
poo2 = count_check[rownames(count_check)%in%poo,]
poo2[[dr_muts_col]]&& poo2[[other_muts_col]]>0
poo2$checks = ifelse(poo2$checkspoo2[[dr_muts_col]]&& poo2[[other_muts_col]]>0, "ambi", "pass")
# remove common_muts_all
ids = gene_metadata$mutation%in%common_muts_all$mutation; table(ids)
gene_metadata_unambiguous = gene_metadata2[!ids,]
# sanity checks: should be true
table(gene_metadata_unambiguous$mutation%in%common_muts_all$mutation)[[1]] == nrow(gene_metadata_unambiguous)
nrow(gene_metadata_unambiguous) + nrow(common_muts_all) == nrow(gene_metadata)
# correct common muts
table(common_muts_all$mutation_info)
common_muts_all$mutation_info = as.factor(common_muts_all$mutation_info)
# change the other_muts to dr_muts
common_muts_all$mutation_info[common_muts_all$mutation_info==other_muts_col] <- dr_muts_col
table(common_muts_all$mutation_info)
common_muts_all$mutation_info = factor(common_muts_all$mutation_info)
table(common_muts_all$mutation_info)
# add it back to
gene_meta_data
###################################################################
# combining: PS
###################################################################