`resolved ambiguous muts and generated clean output. Also seaprated dir.R
This commit is contained in:
parent
46b43cf261
commit
b7c7ffc018
5 changed files with 49 additions and 23 deletions
|
@ -1170,7 +1170,7 @@ del(out_filename_metadata_poscounts)
|
||||||
|
|
||||||
#%% Write file: gene_metadata (i.e gene_LF1)
|
#%% Write file: gene_metadata (i.e gene_LF1)
|
||||||
# where each row has UNIQUE mutations NOT unique sample ids
|
# where each row has UNIQUE mutations NOT unique sample ids
|
||||||
out_filename_metadata = gene.lower() + '_metadata.csv'
|
out_filename_metadata = gene.lower() + '_metadata_raw.csv'
|
||||||
outfile_metadata = outdir + '/' + out_filename_metadata
|
outfile_metadata = outdir + '/' + out_filename_metadata
|
||||||
print('Writing file: LF formatted data'
|
print('Writing file: LF formatted data'
|
||||||
, '\nFile:', outfile_metadata
|
, '\nFile:', outfile_metadata
|
||||||
|
|
|
@ -156,6 +156,7 @@ other_muts = foo2[foo2$mutation_info == other_muts_col,]
|
||||||
|
|
||||||
common_muts = dr_muts[dr_muts$mutation%in%other_muts$mutation,]
|
common_muts = dr_muts[dr_muts$mutation%in%other_muts$mutation,]
|
||||||
#write.csv(common_muts, 'common_muts.csv')
|
#write.csv(common_muts, 'common_muts.csv')
|
||||||
|
rm(common_muts)
|
||||||
|
|
||||||
# FIXME read properly
|
# FIXME read properly
|
||||||
# "ambiguous_mut_names.csv"
|
# "ambiguous_mut_names.csv"
|
||||||
|
@ -165,11 +166,45 @@ ambiguous_muts_names = ambiguous_muts$mutation
|
||||||
|
|
||||||
common_muts_all = gene_metadata[gene_metadata$mutation%in%ambiguous_muts_names,]
|
common_muts_all = gene_metadata[gene_metadata$mutation%in%ambiguous_muts_names,]
|
||||||
|
|
||||||
gene_metadata2 = gene_metadata
|
|
||||||
|
|
||||||
if (gene_metadata$mutation_info[gene_metadata$mutation%in%ambiguous_muts_names] == other_muts_col){
|
if (gene_metadata$mutation_info[gene_metadata$mutation%in%ambiguous_muts_names] == other_muts_col){
|
||||||
print('change me')
|
print('change me')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# make a copy
|
||||||
|
gene_metadata2 = gene_metadata
|
||||||
|
table(gene_metadata$mutation_info)
|
||||||
|
count_check = as.data.frame(cbind(table(gene_metadata$mutationinformation, gene_metadata$mutation_info)))
|
||||||
|
#count_check$checks = ifelse(count_check$dr_mutations_pyrazinamide&&count_check$other_mutations_pyrazinamide>0, "ambi", "pass")
|
||||||
|
table(count_check$checks)
|
||||||
|
|
||||||
|
|
||||||
|
poo = c("V180F", "G132A", "D49G")
|
||||||
|
poo2 = count_check[rownames(count_check)%in%poo,]
|
||||||
|
poo2[[dr_muts_col]]&& poo2[[other_muts_col]]>0
|
||||||
|
poo2$checks = ifelse(poo2$checkspoo2[[dr_muts_col]]&& poo2[[other_muts_col]]>0, "ambi", "pass")
|
||||||
|
|
||||||
|
# remove common_muts_all
|
||||||
|
ids = gene_metadata$mutation%in%common_muts_all$mutation; table(ids)
|
||||||
|
gene_metadata_unambiguous = gene_metadata2[!ids,]
|
||||||
|
|
||||||
|
# sanity checks: should be true
|
||||||
|
table(gene_metadata_unambiguous$mutation%in%common_muts_all$mutation)[[1]] == nrow(gene_metadata_unambiguous)
|
||||||
|
nrow(gene_metadata_unambiguous) + nrow(common_muts_all) == nrow(gene_metadata)
|
||||||
|
|
||||||
|
# correct common muts
|
||||||
|
table(common_muts_all$mutation_info)
|
||||||
|
common_muts_all$mutation_info = as.factor(common_muts_all$mutation_info)
|
||||||
|
|
||||||
|
# change the other_muts to dr_muts
|
||||||
|
common_muts_all$mutation_info[common_muts_all$mutation_info==other_muts_col] <- dr_muts_col
|
||||||
|
|
||||||
|
table(common_muts_all$mutation_info)
|
||||||
|
common_muts_all$mutation_info = factor(common_muts_all$mutation_info)
|
||||||
|
table(common_muts_all$mutation_info)
|
||||||
|
|
||||||
|
# add it back to
|
||||||
|
gene_meta_data
|
||||||
|
|
||||||
###################################################################
|
###################################################################
|
||||||
# combining: PS
|
# combining: PS
|
||||||
###################################################################
|
###################################################################
|
||||||
|
|
|
@ -39,6 +39,7 @@ rm(my_df, upos, dup_muts)
|
||||||
#in_file1: output of plotting_data.R
|
#in_file1: output of plotting_data.R
|
||||||
# my_df_u
|
# my_df_u
|
||||||
|
|
||||||
|
#===========
|
||||||
# output
|
# output
|
||||||
#===========
|
#===========
|
||||||
# mutations with opposite effects
|
# mutations with opposite effects
|
||||||
|
|
|
@ -15,7 +15,10 @@ library(ggplot2)
|
||||||
library(data.table)
|
library(data.table)
|
||||||
library(dplyr)
|
library(dplyr)
|
||||||
|
|
||||||
|
source("dirs.R")
|
||||||
|
|
||||||
require("getopt", quietly = TRUE) #cmd parse arguments
|
require("getopt", quietly = TRUE) #cmd parse arguments
|
||||||
|
|
||||||
#========================================================
|
#========================================================
|
||||||
# command line args
|
# command line args
|
||||||
#spec = matrix(c(
|
#spec = matrix(c(
|
||||||
|
@ -31,19 +34,6 @@ require("getopt", quietly = TRUE) #cmd parse arguments
|
||||||
# stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)")
|
# stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)")
|
||||||
#}
|
#}
|
||||||
#========================================================
|
#========================================================
|
||||||
#%% variable assignment: input and output paths & filenames
|
|
||||||
drug = "pyrazinamide"
|
|
||||||
gene = "pncA"
|
|
||||||
gene_match = paste0(gene,"_p.")
|
|
||||||
cat(gene_match)
|
|
||||||
|
|
||||||
#=============
|
|
||||||
# directories
|
|
||||||
#=============
|
|
||||||
datadir = paste0("~/git/Data")
|
|
||||||
indir = paste0(datadir, "/", drug, "/input")
|
|
||||||
outdir = paste0("~/git/Data", "/", drug, "/output")
|
|
||||||
plotdir = paste0("~/git/Data", "/", drug, "/output/plots")
|
|
||||||
#======
|
#======
|
||||||
# input
|
# input
|
||||||
#======
|
#======
|
||||||
|
@ -53,15 +43,14 @@ infile_params = paste0(outdir, "/", in_filename_params)
|
||||||
cat(paste0("Input file 1:", infile_params) )
|
cat(paste0("Input file 1:", infile_params) )
|
||||||
|
|
||||||
|
|
||||||
dr_muts_col = paste0('dr_mutations_', drug)
|
cat('columns based on variables:\n'
|
||||||
dr_muts_col = paste0('other_mutations_', drug)
|
|
||||||
|
|
||||||
cat('Extracting columns based on variables:\n'
|
|
||||||
, drug
|
, drug
|
||||||
, '\n'
|
, '\n'
|
||||||
, dr_muts_col
|
, dr_muts_col
|
||||||
, '\n'
|
, '\n'
|
||||||
, other_muts_col
|
, other_muts_col
|
||||||
|
, "\n"
|
||||||
|
, resistance_col
|
||||||
, '\n===============================================================')
|
, '\n===============================================================')
|
||||||
|
|
||||||
#%%===============================================================
|
#%%===============================================================
|
||||||
|
@ -74,9 +63,6 @@ my_df = read.csv(infile_params, header = T)
|
||||||
|
|
||||||
cat("\nInput dimensions:", dim(my_df))
|
cat("\nInput dimensions:", dim(my_df))
|
||||||
|
|
||||||
# quick checks
|
|
||||||
#colnames(my_df)
|
|
||||||
#str(my_df)
|
|
||||||
|
|
||||||
###########################
|
###########################
|
||||||
# extract unique mutation entries
|
# extract unique mutation entries
|
||||||
|
|
|
@ -1,6 +1,10 @@
|
||||||
#========
|
#========
|
||||||
# plotting
|
# plotting
|
||||||
#========
|
#========
|
||||||
|
FIXME:
|
||||||
|
|
||||||
|
#dirs.R: imports dir structure
|
||||||
|
change this to cmd so you can pass in drug and gene name
|
||||||
|
|
||||||
#=======================
|
#=======================
|
||||||
plotting_data.R
|
plotting_data.R
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue