#!/usr/bin/env Rscript ######################################################### # TASK: To calculate Allele Frequency and # Odds Ratio from master data ######################################################### # working dir setwd("~/git/LSHTM_analysis/scripts") getwd() # load libraries #source("Header_TT.R") require("getopt", quietly = TRUE) # cmd parse arguments # load functions source("functions/plotting_globals.R") source("functions/mychisq_or.R") source("functions/myaf_or_calcs.R") ############################################################# # command line args #******************** spec = matrix(c( "drug" ,"d", 1, "character", "gene" ,"g", 1, "character", "master_data" ,"m", 2, "character", "gene_data" ,"G", 2, "character", "outfile" ,"o", 2, "character", "idcol" ,"I", 2, "character", "drmuts_col" ,"D", 2, "character", "othermuts_col" ,"O", 2, "character" ), byrow = TRUE, ncol = 4) opt = getopt(spec) drug = opt$drug gene = opt$gene infile_master = opt$master_data infile_metadata = opt$gene_data outfile = opt$outfile idcol = opt$idcol dr_muts_col = opt$drmuts_col other_muts_col = opt$othermuts_col if(is.null(drug)|is.null(gene)) { stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)") } # import_dirs() import_dirs(drug, gene) # setting sensible defaults for opt args #---------------------------- # input file 1: master data #---------------------------- #class(infile_master) if (is.null(infile_master)){ #if (!is.character(infile) && exists("gene")){ #in_filename_master = 'original_tanushree_data_v2.csv' #19K in_filename_master = 'mtb_gwas_meta_v6.csv' #35k infile_master = paste0(datadir, in_filename_master) cat("\nInput file 1 not specified, assuming filename: ", infile_master) cat(paste0("\nReading infile 1 i.e raw data: ", infile_master) ) } #--------------------------------------------------- # input file 2: gene associated meta # data file to extract valid snps and add calcs to. #--------------------------------------------------- if (is.null(infile_metadata)){ # This is outfile_metadata from data_extraction.py in_filename_metadata = paste0(tolower(gene), '_metadata.csv') infile_metadata = paste0(outdir, '/', in_filename_metadata) cat("\nInput file 2 not specified, assuming filename: ", infile_metadata) cat(paste0("\nReading infile 2 i.e gene associated metadata:", infile_metadata)) } #------------------------------------------- # outfile: csv file containing AF and OR #------------------------------------------- if (is.null(outfile)){ # out_filename_af_or = paste0(tolower(gene), '_meta_data_with_AF_OR.csv') out_filename_af_or = paste0(tolower(gene), '_af_or.csv') outfile = paste0(outdir, '/', out_filename_af_or) cat("\nOutfile not specified, assuming filename: ", outfile) cat(paste0('\nOutput file with full path:', outfile)) } #------------------------------------------- # idcol: column name "id" #------------------------------------------- if (is.null(idcol)){ idcol = "id" } #------------------------------------------- # dr-and-others muts cols: comes from plotting_globals.R # colnames that can be constructed using drug # (dr_mutations_), (other_mutations_) #------------------------------------------- if (is.null(dr_muts_col)){ dr_muts_col cat("\ndrug and other mut colnames not specified, sourcing from globals: " , dr_muts_col, "\n") } if (is.null(other_muts_col)){ other_muts_col cat("\ndrug and other mut colnames not specified, sourcing from globals: " , other_muts_col, "\n") } # Informing the user of the sensible defaults being used: cat("======================" , "\nParameters passed:" , "\n======================" , "\nDRUG name: ", drug, "\n" , "\nGENE name: ", gene, "\n" , "\nReading infile 1 i.e raw data: ", infile_master, "\n" , "\nReading infile 2 i.e gene associated metadata:", infile_metadata, "\n" , '\nOutput file with full path:', outfile, "\n" , "\nColumn name of id:", idcol, "\n" , "\ndr mutation colname:", dr_muts_col, "\n" , "\nother mutation colname:", other_muts_col, "\n") #======================================================================= #============================ # call function: my_afor() #============================= my_afor( drug , gene , infile_master , infile_metadata , outfile , idcol , dr_muts_col , other_muts_col ) #=======================================================================